diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000000000000000000000000000000000000..7ae7fa1c5e1b1f8ea0b30dffb6bb23d7b18b5763 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,68 @@ +{ + "permissions": { + "allow": [ + "Bash(git checkout:*)", + "WebFetch(domain:viser.studio)", + "WebSearch", + "WebFetch(domain:github.com)", + "mcp__plugin_context7_context7__resolve-library-id", + "mcp__plugin_context7_context7__query-docs", + "Bash(python -c:*)", + "Bash(uv add:*)", + "Bash(uv:*)", + "Bash(grep:*)", + "Bash(nvidia-smi:*)", + "Bash(nvcc:*)", + "Bash(where:*)", + "Bash(gcc:*)", + "Bash(cl)", + "Bash(python:*)", + "Bash(DISTUTILS_USE_SDK=1 uv pip install:*)", + "Bash(curl:*)", + "Bash(export PATH=\"/c/Program Files/Microsoft Visual Studio/2022/Community/VC/Tools/MSVC/14.44.35207/bin/Hostx64/x64:$PATH\")", + "Bash(git submodule:*)", + "Bash(set \"PATH=C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\VC\\\\Tools\\\\MSVC\\\\14.42.34433\\\\bin\\\\Hostx64\\\\x64;%PATH%\")", + "Bash(set \"ATTN_BACKEND=xformers\")", + "Bash(cmd /c \"set PATH=C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\VC\\\\Tools\\\\MSVC\\\\14.42.34433\\\\bin\\\\Hostx64\\\\x64;%PATH% && set ATTN_BACKEND=xformers && uv run python visualize_flow.py --image assets/example_image/T.png\")", + "Bash(powershell -Command \"$env:ATTN_BACKEND=''xformers''; $env:PATH=''C:\\\\Program Files\\\\Microsoft Visual Studio\\\\2022\\\\Community\\\\VC\\\\Tools\\\\MSVC\\\\14.42.34433\\\\bin\\\\Hostx64\\\\x64;'' + $env:PATH; uv run python visualize_flow.py --image assets/example_image/T.png\")", + "Bash(timeout:*)", + "Bash(.venvScriptspython.exe -c \"from huggingface_hub import whoami; print\\(whoami\\(\\)\\)\")", + "Bash(.venv/Scripts/python.exe:*)", + "Bash(.venv/Scripts/pip.exe install:*)", + "Bash(cd:*)", + "Bash(ping:*)", + "Bash(conda activate:*)", + "Bash(pkill:*)", + "Bash(tasklist:*)", + "Bash(wmic OS get:*)", + "Bash(powershell:*)", + "Bash(dir /b /s \"C:\\\\Users\\\\opsiclear\\\\Desktop\\\\projects\\\\Trellis2_multi_image_conditioning\\\\trellis2\\\\pipelines\"\")", + "Bash(findstr:*)", + "Bash(netstat:*)", + "Bash(taskkill:*)", + "Bash(git add:*)", + "Bash(git commit:*)", + "Bash(git push:*)", + "Bash(gh auth:*)", + "Bash(git config:*)", + "Bash(git ls-tree:*)", + "Bash(ls:*)", + "Bash(wc:*)", + "Bash(git rm:*)", + "Bash(git clone:*)", + "Bash(huggingface-cli upload:*)", + "Bash(pip install:*)", + "Bash(\"C:/Users/opsiclear/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0/LocalCache/local-packages/Python312/Scripts/hf.exe\" upload OpsiClear/Trellis.2.multi-image \"C:/Users/opsiclear/Desktop/projects/Trellis.2.multi-image\" . --repo-type=space)", + "Bash(\"C:/Users/opsiclear/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0/LocalCache/local-packages/Python312/Scripts/hf.exe\" login)", + "Bash(\"C:/Users/opsiclear/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0/LocalCache/local-packages/Python312/Scripts/hf.exe\" --help)", + "Bash(\"C:/Users/opsiclear/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0/LocalCache/local-packages/Python312/Scripts/hf.exe\" auth --help)", + "Bash(\"C:/Users/opsiclear/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0/LocalCache/local-packages/Python312/Scripts/hf.exe\" auth whoami)", + "Bash(C:UsersopsiclearAppDataRoamingPythonPython310Scriptshuggingface-cli.exe repo info spaces/OpsiClear/Trellis.2.multi-image)", + "Bash(\"C:\\\\Users\\\\opsiclear\\\\AppData\\\\Roaming\\\\Python\\\\Python310\\\\Scripts\\\\hf.exe\" upload spaces/OpsiClear/Trellis.2.multi-image README.md --commit-message \"Add suggested_hardware: a100-large for GPU support\")", + "Bash(..venvScriptspython.exe app_local.py)", + "Bash(pip show:*)", + "Bash(huggingface-cli whoami:*)", + "Bash(git remote add:*)" + ] + } +} diff --git a/.gitattributes b/.gitattributes index 8ad10c3b833597eb4d60f464758834c34d1fcfcf..e2c84149d783f14322c85ce9f64022b5c28e928e 100644 --- a/.gitattributes +++ b/.gitattributes @@ -132,3 +132,26 @@ assets/hdri/night.exr filter=lfs diff=lfs merge=lfs -text assets/hdri/sunrise.exr filter=lfs diff=lfs merge=lfs -text assets/hdri/sunset.exr filter=lfs diff=lfs merge=lfs -text assets/teaser.webp filter=lfs diff=lfs merge=lfs -text +o-voxel/assets/overview.webp filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_deps filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/flexible_dual_grid.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/volumetic_attr.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/ext.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/hash/hash.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_neighbor.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_parent.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/svo.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/rasterize/rasterize.obj filter=lfs diff=lfs merge=lfs -text +o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/api.obj filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_000.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_001.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_002.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_003.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_004.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_005.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_006.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_007.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_008.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_009.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_010.glb filter=lfs diff=lfs merge=lfs -text +outputs/step_meshes/step_011.glb filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..12aeee20b64d51c2afe471aed850f6bca55072d6 --- /dev/null +++ b/.gitignore @@ -0,0 +1,207 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[codz] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py.cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# UV +# Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +#uv.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock +#poetry.toml + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python. +# https://pdm-project.org/en/latest/usage/project/#working-with-version-control +#pdm.lock +#pdm.toml +.pdm-python +.pdm-build/ + +# pixi +# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control. +#pixi.lock +# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one +# in the .venv directory. It is recommended not to include this directory in version control. +.pixi + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.envrc +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ + +# Abstra +# Abstra is an AI-powered process automation framework. +# Ignore directories containing user credentials, local state, and settings. +# Learn more at https://abstra.io/docs +.abstra/ + +# Visual Studio Code +# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore +# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore +# and can be added to the global gitignore or merged into this file. However, if you prefer, +# you could uncomment the following to ignore the entire vscode folder +# .vscode/ + +# Ruff stuff: +.ruff_cache/ + +# PyPI configuration file +.pypirc + +# Cursor +# Cursor is an AI-powered code editor. `.cursorignore` specifies files/directories to +# exclude from AI features like autocomplete and code analysis. Recommended for sensitive data +# refer to https://docs.cursor.com/context/ignore-files +.cursorignore +.cursorindexingignore + +# Marimo +marimo/_static/ +marimo/_lsp/ +__marimo__/ diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..0b9215af1c52939b19bf021cb9814cd37394ef45 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "o-voxel/third_party/eigen"] + path = o-voxel/third_party/eigen + url = https://gitlab.com/libeigen/eigen.git diff --git a/README.md b/README.md index 79cc5a3c4d3511df28e119cf6215e449a1eeb237..6439221b562205f6bf697da9411b308794f4ce9e 100644 --- a/README.md +++ b/README.md @@ -5,12 +5,10 @@ colorFrom: blue colorTo: purple sdk: gradio sdk_version: 6.1.0 -python_version: "3.10" app_file: app.py pinned: false license: mit short_description: Multi-view image to 3D generation -suggested_hardware: a100-large --- # TRELLIS.2 Multi-Image Conditioning Fork diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000000000000000000000000000000000000..fd4ce163a4d96de6e81c25d708118bcbfc80e40b --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,14 @@ + + +## Security + +Microsoft takes the security of our software products and services seriously, which +includes all source code repositories in our GitHub organizations. + +**Please do not report security vulnerabilities through public GitHub issues.** + +For security reporting information, locations, contact information, and policies, +please review the latest guidance for Microsoft repositories at +[https://aka.ms/SECURITY.md](https://aka.ms/SECURITY.md). + + \ No newline at end of file diff --git a/app.py b/app.py index 594933e67701d4258f697e1c4799b85f37519ba3..f4feb3679cff39de6375ebfa657824ff327f1f2f 100644 --- a/app.py +++ b/app.py @@ -13,78 +13,17 @@ from datetime import datetime import shutil import cv2 from typing import * +import torch import numpy as np from PIL import Image import base64 import io import tempfile - -# Lazy imports - will be loaded when GPU is available -torch = None -SparseTensor = None -Trellis2ImageTo3DPipeline = None -EnvMap = None -render_utils = None -o_voxel = None - -# Global state - initialized on first GPU call -pipeline = None -envmap = None -_initialized = False - - -def _lazy_import(): - """Import GPU-dependent modules. Must be called from within a @spaces.GPU function.""" - global torch, SparseTensor, Trellis2ImageTo3DPipeline, EnvMap, render_utils, o_voxel - if torch is None: - import torch as _torch - torch = _torch - if SparseTensor is None: - from trellis2.modules.sparse import SparseTensor as _SparseTensor - SparseTensor = _SparseTensor - if Trellis2ImageTo3DPipeline is None: - from trellis2.pipelines import Trellis2ImageTo3DPipeline as _Trellis2ImageTo3DPipeline - Trellis2ImageTo3DPipeline = _Trellis2ImageTo3DPipeline - if EnvMap is None: - from trellis2.renderers import EnvMap as _EnvMap - EnvMap = _EnvMap - if render_utils is None: - from trellis2.utils import render_utils as _render_utils - render_utils = _render_utils - if o_voxel is None: - import o_voxel as _o_voxel - o_voxel = _o_voxel - - -def _initialize_pipeline(): - """Initialize the pipeline and environment maps. Must be called from within a @spaces.GPU function.""" - global pipeline, envmap, _initialized - if _initialized: - return - - _lazy_import() - - pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B') - pipeline.rembg_model = None - pipeline.low_vram = False - pipeline.cuda() - - envmap = { - 'forest': EnvMap(torch.tensor( - cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), - dtype=torch.float32, device='cuda' - )), - 'sunset': EnvMap(torch.tensor( - cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), - dtype=torch.float32, device='cuda' - )), - 'courtyard': EnvMap(torch.tensor( - cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), - dtype=torch.float32, device='cuda' - )), - } - - _initialized = True +from trellis2.modules.sparse import SparseTensor +from trellis2.pipelines import Trellis2ImageTo3DPipeline +from trellis2.renderers import EnvMap +from trellis2.utils import render_utils +import o_voxel MAX_SEED = np.iinfo(np.int32).max @@ -103,30 +42,54 @@ DEFAULT_STEP = 3 css = """ -/* ColmapView Dark Theme */ -:root { - --body-background-fill: #0a0a0a !important; - --background-fill-primary: #0f0f0f !important; - --background-fill-secondary: #161616 !important; - --block-background-fill: #161616 !important; - --input-background-fill: #1a1a1a !important; - --body-text-color: #e8e8e8 !important; - --block-label-text-color: #8a8a8a !important; - --block-title-text-color: #e8e8e8 !important; - --border-color-primary: #2a2a2a !important; - --color-accent: #b8b8b8 !important; - --color-accent-soft: rgba(184, 184, 184, 0.15) !important; - --button-primary-background-fill: #b8b8b8 !important; - --button-primary-text-color: #0a0a0a !important; +/* Overwrite Gradio Default Style */ +.stepper-wrapper { + padding: 0; +} + +.stepper-container { + padding: 0; + align-items: center; +} + +.step-button { + flex-direction: row; +} + +.step-connector { + transform: none; +} + +.step-number { + width: 16px; + height: 16px; +} + +.step-label { + position: relative; + bottom: 0; +} + +.wrap.center.full { + inset: 0; + height: 100%; } -body { background: #0a0a0a !important; } -.gradio-container { background: #0f0f0f !important; } -.dark { background: #0f0f0f !important; } +.wrap.center.full.translucent { + background: var(--block-background-fill); +} + +.meta-text-center { + display: block !important; + position: absolute !important; + top: unset !important; + bottom: 0 !important; + right: 0 !important; + transform: unset !important; +} -/* Previewer (required for custom HTML viewer) */ +/* Previewer */ .previewer-container { - background: #0a0a0a; position: relative; font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif; width: 100%; @@ -177,6 +140,7 @@ body { background: #0a0a0a !important; } opacity: 100%; } +/* Row 1: Display Modes */ .previewer-container .mode-row { width: 100%; display: flex; @@ -202,6 +166,7 @@ body { background: #0a0a0a !important; } transform: scale(1.1); } +/* Row 2: Display Image */ .previewer-container .display-row { margin-bottom: 20px; min-height: 400px; @@ -222,6 +187,7 @@ body { background: #0a0a0a !important; } display: block; } +/* Row 3: Custom HTML Slider */ .previewer-container .slider-row { width: 100%; display: flex; @@ -259,6 +225,7 @@ body { background: #0a0a0a !important; } transform: scale(1.2); } +/* Overwrite Previewer Block Style */ .gradio-container .padded:has(.previewer-container) { padding: 0 !important; } @@ -288,9 +255,11 @@ head = """ } // 2. Hide ALL images + // We select all elements with class 'previewer-main-image' allImgs.forEach(img => img.classList.remove('visible')); // 3. Construct the specific ID for the current state + // Format: view-m{mode}-s{step} const targetId = 'view-m' + mode + '-s' + step; const targetImg = document.getElementById(targetId); @@ -320,10 +289,10 @@ head = """ """ -empty_html = """ +empty_html = f"""
- +
""" @@ -343,8 +312,7 @@ def start_session(req: gr.Request): def end_session(req: gr.Request): user_dir = os.path.join(TMP_DIR, str(req.session_hash)) - if os.path.exists(user_dir): - shutil.rmtree(user_dir) + shutil.rmtree(user_dir) def remove_background(input: Image.Image) -> Image.Image: @@ -357,7 +325,10 @@ def remove_background(input: Image.Image) -> Image.Image: def preprocess_image(input: Image.Image) -> Image.Image: - """Preprocess a single input image.""" + """ + Preprocess the input image. + """ + # if has alpha channel, use it directly; otherwise, remove background has_alpha = False if input.mode == 'RGBA': alpha = np.array(input)[:, :, 3] @@ -379,7 +350,7 @@ def preprocess_image(input: Image.Image) -> Image.Image: size = max(bbox[2] - bbox[0], bbox[3] - bbox[1]) size = int(size * 1) bbox = center[0] - size // 2, center[1] - size // 2, center[0] + size // 2, center[1] + size // 2 - output = output.crop(bbox) + output = output.crop(bbox) # type: ignore output = np.array(output).astype(np.float32) / 255 output = output[:, :, :3] * output[:, :, 3:4] output = Image.fromarray((output * 255).astype(np.uint8)) @@ -387,16 +358,17 @@ def preprocess_image(input: Image.Image) -> Image.Image: def preprocess_images(images: List[Tuple[Image.Image, str]]) -> List[Image.Image]: - """Preprocess a list of input images. Uses parallel processing.""" - if not images: - return [] - imgs = [img[0] if isinstance(img, tuple) else img for img in images] - with ThreadPoolExecutor(max_workers=min(4, len(imgs))) as executor: - processed_images = list(executor.map(preprocess_image, imgs)) + """ + Preprocess a list of input images for multi-image conditioning. + Uses parallel processing for faster background removal. + """ + images = [image[0] for image in images] + with ThreadPoolExecutor(max_workers=min(4, len(images))) as executor: + processed_images = list(executor.map(preprocess_image, images)) return processed_images -def pack_state(latents): +def pack_state(latents: Tuple[SparseTensor, SparseTensor, int]) -> dict: shape_slat, tex_slat, res = latents return { 'shape_slat_feats': shape_slat.feats.cpu().numpy(), @@ -406,8 +378,7 @@ def pack_state(latents): } -def unpack_state(state: dict): - _lazy_import() +def unpack_state(state: dict) -> Tuple[SparseTensor, SparseTensor, int]: shape_slat = SparseTensor( feats=torch.from_numpy(state['shape_slat_feats']).cuda(), coords=torch.from_numpy(state['coords']).cuda(), @@ -417,32 +388,33 @@ def unpack_state(state: dict): def get_seed(randomize_seed: bool, seed: int) -> int: + """ + Get the random seed. + """ return np.random.randint(0, MAX_SEED) if randomize_seed else seed def prepare_multi_example() -> List[Image.Image]: - """Prepare multi-image examples as concatenated images for gr.Examples.""" - example_dir = "assets/example_multi_image" - if not os.path.exists(example_dir): - return [] - cases = list(set([f.split('_')[0] for f in os.listdir(example_dir) if '_' in f and f.endswith('.png')])) + """ + Prepare multi-image examples for the gallery. + """ + multi_case = list(set([i.split('_')[0] for i in os.listdir("assets/example_multi_image")])) images = [] - for case in sorted(cases): - case_images = [] - for i in range(1, 10): - img_path = f'{example_dir}/{case}_{i}.png' - if os.path.exists(img_path): - img = Image.open(img_path) - W, H = img.size - img = img.resize((int(W / H * 512), 512)) - case_images.append(np.array(img)) - if case_images: - images.append(Image.fromarray(np.concatenate(case_images, axis=1))) + for case in multi_case: + _images = [] + for i in range(1, 4): + img = Image.open(f'assets/example_multi_image/{case}_{i}.png') + W, H = img.size + img = img.resize((int(W / H * 512), 512)) + _images.append(np.array(img)) + images.append(Image.fromarray(np.concatenate(_images, axis=1))) return images def split_image(image: Image.Image) -> List[Image.Image]: - """Split a concatenated multi-view image into separate images based on alpha.""" + """ + Split a concatenated image into multiple views. + """ image = np.array(image) alpha = image[..., 3] alpha = np.any(alpha > 0, axis=0) @@ -451,12 +423,12 @@ def split_image(image: Image.Image) -> List[Image.Image]: images = [] for s, e in zip(start_pos, end_pos): images.append(Image.fromarray(image[:, s:e+1])) - return [preprocess_image(img) for img in images] + return [preprocess_image(image) for image in images] @spaces.GPU(duration=120) def image_to_3d( - images: List[Tuple[Image.Image, str]], + image: Image.Image, seed: int, resolution: str, ss_guidance_strength: float, @@ -471,24 +443,16 @@ def image_to_3d( tex_slat_guidance_rescale: float, tex_slat_sampling_steps: int, tex_slat_rescale_t: float, - multiimage_algo: Literal["multidiffusion", "stochastic"], req: gr.Request, progress=gr.Progress(track_tqdm=True), + multiimages: List[Tuple[Image.Image, str]] = None, + is_multiimage: bool = False, + multiimage_algo: Literal["multidiffusion", "stochastic"] = "stochastic", ) -> str: - # Initialize pipeline on first call - _initialize_pipeline() - - # Extract images from gallery format - if not images: - raise gr.Error("Please upload at least one image") - - imgs = [img[0] if isinstance(img, tuple) else img for img in images] - # --- Sampling --- - if len(imgs) == 1: - # Single image mode + if not is_multiimage: outputs, latents = pipeline.run( - imgs[0], + image, seed=seed, preprocess_image=False, sparse_structure_sampler_params={ @@ -517,9 +481,8 @@ def image_to_3d( return_latent=True, ) else: - # Multi-image mode outputs, latents = pipeline.run_multi_image( - imgs, + [image[0] for image in multiimages], seed=seed, preprocess_image=False, sparse_structure_sampler_params={ @@ -548,44 +511,85 @@ def image_to_3d( return_latent=True, mode=multiimage_algo, ) - mesh = outputs[0] - mesh.simplify(16777216) - render_images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap) + mesh.simplify(16777216) # nvdiffrast limit + images = render_utils.render_snapshot(mesh, resolution=1024, r=2, fov=36, nviews=STEPS, envmap=envmap) state = pack_state(latents) torch.cuda.empty_cache() # --- HTML Construction --- + # The Stack of 48 Images - encode in parallel for speed + def encode_preview_image(args): + m_idx, s_idx, render_key = args + img_base64 = image_to_base64(Image.fromarray(images[render_key][s_idx])) + return (m_idx, s_idx, img_base64) + + encode_tasks = [ + (m_idx, s_idx, mode['render_key']) + for m_idx, mode in enumerate(MODES) + for s_idx in range(STEPS) + ] + + with ThreadPoolExecutor(max_workers=8) as executor: + encoded_results = list(executor.map(encode_preview_image, encode_tasks)) + + # Build HTML from encoded results + encoded_map = {(m, s): b64 for m, s, b64 in encoded_results} images_html = "" for m_idx, mode in enumerate(MODES): for s_idx in range(STEPS): unique_id = f"view-m{m_idx}-s{s_idx}" is_visible = (m_idx == DEFAULT_MODE and s_idx == DEFAULT_STEP) vis_class = "visible" if is_visible else "" - img_base64 = image_to_base64(Image.fromarray(render_images[mode['render_key']][s_idx])) - images_html += f'' + img_base64 = encoded_map[(m_idx, s_idx)] + + images_html += f""" + + """ + # Button Row HTML btns_html = "" for idx, mode in enumerate(MODES): active_class = "active" if idx == DEFAULT_MODE else "" - btns_html += f'' - + # Note: onclick calls the JS function defined in Head + btns_html += f""" + + """ + + # Assemble the full component full_html = f"""
-
Tips
+
💡Tips
-

Render Mode - Click buttons to switch render modes.

-

View Angle - Drag slider to change view.

+

â— Render Mode - Click on the circular buttons to switch between different render modes.

+

â— View Angle - Drag the slider to change the view angle.

-
{images_html}
-
{btns_html}
+ + +
+ {images_html} +
+ + +
+ {btns_html} +
+ +
""" + return state, full_html @@ -597,12 +601,21 @@ def extract_glb( req: gr.Request, progress=gr.Progress(track_tqdm=True), ) -> Tuple[str, str]: - _initialize_pipeline() + """ + Extract a GLB file from the 3D model. + + Args: + state (dict): The state of the generated 3D model. + decimation_target (int): The target face count for decimation. + texture_size (int): The texture resolution. + Returns: + str: The path to the extracted GLB file. + """ user_dir = os.path.join(TMP_DIR, str(req.session_hash)) shape_slat, tex_slat, res = unpack_state(state) mesh = pipeline.decode_latent(shape_slat, tex_slat, res)[0] - mesh.simplify(16777216) + mesh.simplify(16777216) # nvdiffrast limit glb = o_voxel.postprocess.to_glb( vertices=mesh.vertices, faces=mesh.faces, @@ -629,22 +642,22 @@ def extract_glb( with gr.Blocks(delete_cache=(600, 600)) as demo: gr.Markdown(""" - ## Multi-View Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2) - * Upload one or more images and click Generate to create a 3D asset. - * Multiple views from different angles will produce better results. - * Click Extract GLB to export and download the generated GLB file. + ## Image to 3D Asset with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2) + * Upload an image (preferably with an alpha-masked foreground object) and click Generate to create a 3D asset. + * Click Extract GLB to export and download the generated GLB file if you're satisfied with the result. Otherwise, try another time. """) with gr.Row(): with gr.Column(scale=1, min_width=360): - image_prompt = gr.Gallery( - label="Input Images", - format="png", - type="pil", - height=400, - columns=3, - object_fit="contain" - ) + with gr.Tabs() as input_tabs: + with gr.Tab(label="Single Image", id=0) as single_image_input_tab: + image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=400) + with gr.Tab(label="Multiple Images", id=1) as multiimage_input_tab: + multiimage_prompt = gr.Gallery(label="Image Prompt", format="png", type="pil", height=400, columns=3) + gr.Markdown(""" + Input different views of the object in separate images. + *NOTE: this is an experimental algorithm without training a specialized model. It may not produce the best results for all images, especially those having different poses or inconsistent details.* + """) resolution = gr.Radio(["512", "1024", "1536"], label="Resolution", value="1024") seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1) @@ -676,44 +689,73 @@ with gr.Blocks(delete_cache=(600, 600)) as demo: multiimage_algo = gr.Radio(["stochastic", "multidiffusion"], label="Multi-image Algorithm", value="stochastic") with gr.Column(scale=10): - with gr.Walkthrough(selected=0) as walkthrough: - with gr.Step("Preview", id=0): + with gr.Tabs() as tabs: + with gr.Tab("Preview", id=0): preview_output = gr.HTML(empty_html, label="3D Asset Preview", show_label=True, container=True) extract_btn = gr.Button("Extract GLB") - with gr.Step("Extract", id=1): + with gr.Tab("Extract", id=1): glb_output = gr.Model3D(label="Extracted GLB", height=724, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0)) download_btn = gr.DownloadButton(label="Download GLB") - gr.Markdown("*GLB extraction may take 30+ seconds.*") + gr.Markdown("*We are actively working on improving the speed of GLB extraction. Currently, it may take half a minute or more and face count is limited.*") - with gr.Column(scale=1, min_width=200): - # Hidden image for examples input - example_image = gr.Image(visible=False, type="pil", image_mode="RGBA") - gr.Markdown("### Multi-View Examples") + with gr.Column(scale=1, min_width=172) as single_image_example: examples = gr.Examples( + examples=[ + f'assets/example_image/{image}' + for image in os.listdir("assets/example_image") + ], + inputs=[image_prompt], + fn=preprocess_image, + outputs=[image_prompt], + run_on_click=True, + examples_per_page=18, + ) + + with gr.Column(visible=True) as multiimage_example: + examples_multi = gr.Examples( examples=prepare_multi_example(), - inputs=[example_image], + label="Multi Image Examples", + inputs=[image_prompt], fn=split_image, - outputs=[image_prompt], + outputs=[multiimage_prompt], run_on_click=True, - examples_per_page=12, + examples_per_page=8, ) + is_multiimage = gr.State(False) output_buf = gr.State() + # Handlers demo.load(start_session) demo.unload(end_session) + single_image_input_tab.select( + lambda: (False, gr.update(visible=True), gr.update(visible=True)), + outputs=[is_multiimage, single_image_example, multiimage_example] + ) + multiimage_input_tab.select( + lambda: (True, gr.update(visible=True), gr.update(visible=True)), + outputs=[is_multiimage, single_image_example, multiimage_example] + ) + image_prompt.upload( - preprocess_images, + preprocess_image, inputs=[image_prompt], outputs=[image_prompt], ) + multiimage_prompt.upload( + preprocess_images, + inputs=[multiimage_prompt], + outputs=[multiimage_prompt], + ) generate_btn.click( - get_seed, inputs=[randomize_seed, seed], outputs=[seed], + get_seed, + inputs=[randomize_seed, seed], + outputs=[seed], ).then( - lambda: gr.Walkthrough(selected=0), outputs=walkthrough + lambda: gr.Tabs(selected=0), outputs=tabs ).then( image_to_3d, inputs=[ @@ -721,13 +763,13 @@ with gr.Blocks(delete_cache=(600, 600)) as demo: ss_guidance_strength, ss_guidance_rescale, ss_sampling_steps, ss_rescale_t, shape_slat_guidance_strength, shape_slat_guidance_rescale, shape_slat_sampling_steps, shape_slat_rescale_t, tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t, - multiimage_algo + multiimage_prompt, is_multiimage, multiimage_algo ], outputs=[output_buf, preview_output], ) extract_btn.click( - lambda: gr.Walkthrough(selected=1), outputs=walkthrough + lambda: gr.Tabs(selected=1), outputs=tabs ).then( extract_glb, inputs=[output_buf, decimation_target, texture_size], @@ -735,13 +777,35 @@ with gr.Blocks(delete_cache=(600, 600)) as demo: ) +# Launch the Gradio app if __name__ == "__main__": os.makedirs(TMP_DIR, exist_ok=True) + # Construct ui components + btn_img_base64_strs = {} for i in range(len(MODES)): icon = Image.open(MODES[i]['icon']) MODES[i]['icon_base64'] = image_to_base64(icon) rmbg_client = Client("briaai/BRIA-RMBG-2.0") + pipeline = Trellis2ImageTo3DPipeline.from_pretrained('microsoft/TRELLIS.2-4B') + pipeline.rembg_model = None + pipeline.low_vram = False + pipeline.cuda() + + envmap = { + 'forest': EnvMap(torch.tensor( + cv2.cvtColor(cv2.imread('assets/hdri/forest.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), + dtype=torch.float32, device='cuda' + )), + 'sunset': EnvMap(torch.tensor( + cv2.cvtColor(cv2.imread('assets/hdri/sunset.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), + dtype=torch.float32, device='cuda' + )), + 'courtyard': EnvMap(torch.tensor( + cv2.cvtColor(cv2.imread('assets/hdri/courtyard.exr', cv2.IMREAD_UNCHANGED), cv2.COLOR_BGR2RGB), + dtype=torch.float32, device='cuda' + )), + } demo.launch(css=css, head=head) diff --git a/app_texturing.py b/app_texturing.py new file mode 100644 index 0000000000000000000000000000000000000000..a4e144eea3e05420a98854742c121bee9c3cb883 --- /dev/null +++ b/app_texturing.py @@ -0,0 +1,151 @@ +import gradio as gr + +import os +os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True" +from datetime import datetime +import shutil +from typing import * +import torch +import numpy as np +import trimesh +from PIL import Image +from trellis2.pipelines import Trellis2TexturingPipeline + + +MAX_SEED = np.iinfo(np.int32).max +TMP_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'tmp') + + +def start_session(req: gr.Request): + user_dir = os.path.join(TMP_DIR, str(req.session_hash)) + os.makedirs(user_dir, exist_ok=True) + + +def end_session(req: gr.Request): + user_dir = os.path.join(TMP_DIR, str(req.session_hash)) + shutil.rmtree(user_dir) + + +def preprocess_image(image: Image.Image) -> Image.Image: + """ + Preprocess the input image. + + Args: + image (Image.Image): The input image. + + Returns: + Image.Image: The preprocessed image. + """ + processed_image = pipeline.preprocess_image(image) + return processed_image + + +def get_seed(randomize_seed: bool, seed: int) -> int: + """ + Get the random seed. + """ + return np.random.randint(0, MAX_SEED) if randomize_seed else seed + + +def shapeimage_to_tex( + mesh_file: str, + image: Image.Image, + seed: int, + resolution: str, + texture_size: int, + tex_slat_guidance_strength: float, + tex_slat_guidance_rescale: float, + tex_slat_sampling_steps: int, + tex_slat_rescale_t: float, + req: gr.Request, + progress=gr.Progress(track_tqdm=True), +) -> str: + mesh = trimesh.load(mesh_file) + if isinstance(mesh, trimesh.Scene): + mesh = mesh.to_mesh() + output = pipeline.run( + mesh, + image, + seed=seed, + preprocess_image=False, + tex_slat_sampler_params={ + "steps": tex_slat_sampling_steps, + "guidance_strength": tex_slat_guidance_strength, + "guidance_rescale": tex_slat_guidance_rescale, + "rescale_t": tex_slat_rescale_t, + }, + resolution=int(resolution), + texture_size=texture_size, + ) + now = datetime.now() + timestamp = now.strftime("%Y-%m-%dT%H%M%S") + f".{now.microsecond // 1000:03d}" + user_dir = os.path.join(TMP_DIR, str(req.session_hash)) + os.makedirs(user_dir, exist_ok=True) + glb_path = os.path.join(user_dir, f'sample_{timestamp}.glb') + output.export(glb_path, extension_webp=True) + torch.cuda.empty_cache() + return glb_path, glb_path + + +with gr.Blocks(delete_cache=(600, 600)) as demo: + gr.Markdown(""" + ## Texturing a mesh with [TRELLIS.2](https://microsoft.github.io/TRELLIS.2) + * Upload a mesh and corresponding reference image (preferably with an alpha-masked foreground object) and click Generate to create a textured 3D asset. + """) + + with gr.Row(): + with gr.Column(scale=1, min_width=360): + mesh_file = gr.File(label="Upload Mesh", file_types=[".ply", ".obj", ".glb", ".gltf"], file_count="single") + image_prompt = gr.Image(label="Image Prompt", format="png", image_mode="RGBA", type="pil", height=400) + + resolution = gr.Radio(["512", "1024", "1536"], label="Resolution", value="1024") + seed = gr.Slider(0, MAX_SEED, label="Seed", value=0, step=1) + randomize_seed = gr.Checkbox(label="Randomize Seed", value=True) + texture_size = gr.Slider(1024, 4096, label="Texture Size", value=2048, step=1024) + + generate_btn = gr.Button("Generate") + + with gr.Accordion(label="Advanced Settings", open=False): + with gr.Row(): + tex_slat_guidance_strength = gr.Slider(1.0, 10.0, label="Guidance Strength", value=1.0, step=0.1) + tex_slat_guidance_rescale = gr.Slider(0.0, 1.0, label="Guidance Rescale", value=0.0, step=0.01) + tex_slat_sampling_steps = gr.Slider(1, 50, label="Sampling Steps", value=12, step=1) + tex_slat_rescale_t = gr.Slider(1.0, 6.0, label="Rescale T", value=3.0, step=0.1) + + with gr.Column(scale=10): + glb_output = gr.Model3D(label="Extracted GLB", height=724, show_label=True, display_mode="solid", clear_color=(0.25, 0.25, 0.25, 1.0)) + download_btn = gr.DownloadButton(label="Download GLB") + + + # Handlers + demo.load(start_session) + demo.unload(end_session) + + image_prompt.upload( + preprocess_image, + inputs=[image_prompt], + outputs=[image_prompt], + ) + + generate_btn.click( + get_seed, + inputs=[randomize_seed, seed], + outputs=[seed], + ).then( + shapeimage_to_tex, + inputs=[ + mesh_file, image_prompt, seed, resolution, texture_size, + tex_slat_guidance_strength, tex_slat_guidance_rescale, tex_slat_sampling_steps, tex_slat_rescale_t, + ], + outputs=[glb_output, download_btn], + ) + + +# Launch the Gradio app +if __name__ == "__main__": + os.makedirs(TMP_DIR, exist_ok=True) + + pipeline = Trellis2TexturingPipeline.from_pretrained('microsoft/TRELLIS.2-4B', config_file="texturing_pipeline.json") + pipeline.cuda() + + demo.launch() diff --git a/o-voxel/README.md b/o-voxel/README.md new file mode 100644 index 0000000000000000000000000000000000000000..0ced733c3b168aea690abb36ef19a96e3d68b6d9 --- /dev/null +++ b/o-voxel/README.md @@ -0,0 +1,174 @@ +# O-Voxel: A Native 3D Representation + +**O-Voxel** is a sparse, voxel-based native 3D representation designed for high-quality 3D generation and reconstruction. Unlike traditional methods that rely on fields (e.g., Occupancy fields, SDFs), O-Voxel utilizes a **Flexible Dual Grid** formulation to robustly represent surfaces with arbitrary topology (including non-manifold and open surfaces) and **volumetric surface properties** such as Physically-Based Rendering (PBR) material attributes. + +This library provides an efficient implementation for the instant bidirectional conversion between Meshes and O-Voxels, along with tools for sparse voxel compression, serialization, and rendering. + +![Overview](assets/overview.webp) + +## Key Features + +- **🧱 Flexible Dual Grid**: A geometry representation that solves a enhanced QEF (Quadratic Error Function) to accurately capture sharp features and open boundaries without requiring watertight meshes. +- **🎨 Volumetric PBR Attributes**: Native support for physically-based rendering properties (Base Color, Metallic, Roughness, Opacity) aligned with the sparse voxel grid. +- **âš¡ Instant Bidirectional Conversion**: Rapid `Mesh <-> O-Voxel` conversion without expensive SDF evaluation, flood-filling, or iterative optimization. +- **💾 Efficient Compression**: Supports custom `.vxz` format for compact storage of sparse voxel structures using Z-order/Hilbert curve encoding. +- **ðŸ› ï¸ Production Ready**: Tools to export converted assets directly to `.glb` with UV unwrapping and texture baking. + +## Installation + +```bash +git clone -b main https://github.com/microsoft/TRELLIS.2.git --recursive +pip install TRELLIS.2/o_voxel --no-build-isolation +``` + +## Quick Start + +> See also the [examples](examples) directory for more detailed usage. + +### 1. Convert Mesh to O-Voxel [[link]](examples/mesh2ovox.py) +Convert a standard 3D mesh (with textures) into the O-Voxel representation. + +```python +asset = trimesh.load("path/to/mesh.glb") + +# 1. Geometry Voxelization (Flexible Dual Grid) +# Returns: occupied indices, dual vertices (QEF solution), and edge intersected +mesh = asset.to_mesh() +vertices = torch.from_numpy(mesh.vertices).float() +faces = torch.from_numpy(mesh.faces).long() +voxel_indices, dual_vertices, intersected = o_voxel.convert.mesh_to_flexible_dual_grid( + vertices, faces, + grid_size=RES, # Resolution + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], # Axis-aligned bounding box + face_weight=1.0, # Face term weight in QEF + boundary_weight=0.2, # Boundary term weight in QEF + regularization_weight=1e-2, # Regularization term weight in QEF + timing=True +) +## sort to ensure align between geometry and material voxelization +vid = o_voxel.serialize.encode_seq(voxel_indices) +mapping = torch.argsort(vid) +voxel_indices = voxel_indices[mapping] +dual_vertices = dual_vertices[mapping] +intersected = intersected[mapping] + +# 2. Material Voxelization (Volumetric Attributes) +# Returns: dict containing 'base_color', 'metallic', 'roughness', etc. +voxel_indices_mat, attributes = o_voxel.convert.textured_mesh_to_volumetric_attr( + asset, + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], + timing=True +) +## sort to ensure align between geometry and material voxelization +vid_mat = o_voxel.serialize.encode_seq(voxel_indices_mat) +mapping_mat = torch.argsort(vid_mat) +attributes = {k: v[mapping_mat] for k, v in attributes.items()} + +# Save to compressed .vxz format +## packing +dual_vertices = dual_vertices * RES - voxel_indices +dual_vertices = (torch.clamp(dual_vertices, 0, 1) * 255).type(torch.uint8) +intersected = (intersected[:, 0:1] + 2 * intersected[:, 1:2] + 4 * intersected[:, 2:3]).type(torch.uint8) +attributes['dual_vertices'] = dual_vertices +attributes['intersected'] = intersected +o_voxel.io.write("ovoxel_helmet.vxz", voxel_indices, attributes) +``` + +### 2. Recover Mesh from O-Voxel [[link]](examples/ovox2mesh.py) +Reconstruct the surface mesh from the sparse voxel data. + +```python +# Load data +coords, data = o_voxel.io.read("path/to/ovoxel.vxz") +dual_vertices = data['dual_vertices'] +intersected = data['intersected'] +base_color = data['base_color'] +## ... other attributes omitted for brevity + +# Depack +dual_vertices = dual_vertices / 255 +intersected = torch.cat([ + intersected % 2, + intersected // 2 % 2, + intersected // 4 % 2, +], dim=-1).bool() + +# Extract Mesh +# O-Voxel connects dual vertices to form quads, optionally splitting them +# based on geometric features. +rec_verts, rec_faces = o_voxel.convert.flexible_dual_grid_to_mesh( + coords.cuda(), + dual_vertices.cuda(), + intersected.cuda(), + split_weight=None, # Auto-split based on min angle if None + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], +) +``` + +### 3. Export to GLB [[link]](examples/ovox2glb.py) +For visualization in standard 3D viewers, you can clean, UV-unwrap, and bake the volumetric attributes into textures. + +```python +# Assuming you have the reconstructed verts/faces and volume attributes +mesh = o_voxel.postprocess.to_glb( + vertices=rec_verts, + faces=rec_faces, + attr_volume=attr_tensor, # Concatenated attributes + coords=coords, + attr_layout={'base_color': slice(0,3), 'metallic': slice(3,4), ...}, + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], + decimation_target=100000, + texture_size=2048, + verbose=True, +) +mesh.export("rec_helmet.glb") +``` + +### 4. Voxel Rendering [[link]](examples/render_ovox.py) +Render the voxel representation directly. + +```python +# Load data +coords, data = o_voxel.io.read("ovoxel_helmet.vxz") +position = (coords / RES - 0.5).cuda() +base_color = (data['base_color'] / 255).cuda() + +# Render +renderer = o_voxel.rasterize.VoxelRenderer( + rendering_options={"resolution": 512, "ssaa": 2} +) +output = renderer.render( + position=position, # Voxel centers + attrs=base_color, # Color/Opacity etc. + voxel_size=1.0/RES, + extrinsics=extr, + intrinsics=intr +) +# output.attr contains the rendered image (C, H, W) +``` + +## API Overview + +### `o_voxel.convert` +Core algorithms for the conversion between meshes and O-Voxels. +* `mesh_to_flexible_dual_grid`: Determines the active sparse voxels and solves the QEF to determine dual vertex positions within voxels based on mesh-voxel grid intersections. +* `flexible_dual_grid_to_mesh`: Reconnects dual vertices to form a surface. +* `textured_mesh_to_volumetric_attr`: Samples texture maps into voxel space. + +### `o_voxel.io` +Handles sparse voxel file I/O operations. +* **Formats**: `.npz` (NumPy), `.ply` (Point Cloud), `.vxz` (Custom compressed, recommended). +* **Functions**: `read()`, `write()`. + +### `o_voxel.serialize` +Utilities for spatial hashing and ordering. +* `encode_seq` / `decode_seq`: Converts 3D coordinates to/from Morton codes (Z-order) or Hilbert curves for efficient storage and processing. + +### `o_voxel.rasterize` +* `VoxelRenderer`: A lightweight renderer for sparse voxel visualization during training. + +### `o_voxel.postprocess` +* `to_glb`: A comprehensive pipeline for mesh cleaning, remeshing, UV unwrapping, and texture baking. diff --git a/o-voxel/assets/overview.webp b/o-voxel/assets/overview.webp new file mode 100644 index 0000000000000000000000000000000000000000..5317e1364e4f0d497369cca7d00d4698bc339903 --- /dev/null +++ b/o-voxel/assets/overview.webp @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba51a74520803e36ec5be8e1aab1c71fea8a8df78a0502f0736e338ff4e97f93 +size 391770 diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/__init__.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55ffd986b47551517fbf1d1538b40f77ec5ee8f8 --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/__init__.py @@ -0,0 +1,7 @@ +from . import ( + convert, + io, + postprocess, + rasterize, + serialize +) \ No newline at end of file diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/__init__.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25755f9b06a2cf37856f3be043e3928a5e23510c --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/__init__.py @@ -0,0 +1,2 @@ +from .flexible_dual_grid import * +from .volumetic_attr import * \ No newline at end of file diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/flexible_dual_grid.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/flexible_dual_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..51b8b0552fe697e95f7496a370a56c538c8abd10 --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/flexible_dual_grid.py @@ -0,0 +1,283 @@ +from typing import * +import numpy as np +import torch +from .. import _C + +__all__ = [ + "mesh_to_flexible_dual_grid", + "flexible_dual_grid_to_mesh", +] + + +def _init_hashmap(grid_size, capacity, device): + VOL = (grid_size[0] * grid_size[1] * grid_size[2]).item() + + # If the number of elements in the tensor is less than 2^32, use uint32 as the hashmap type, otherwise use uint64. + if VOL < 2**32: + hashmap_keys = torch.full((capacity,), torch.iinfo(torch.uint32).max, dtype=torch.uint32, device=device) + elif VOL < 2**64: + hashmap_keys = torch.full((capacity,), torch.iinfo(torch.uint64).max, dtype=torch.uint64, device=device) + else: + raise ValueError(f"The spatial size is too large to fit in a hashmap. Get volumn {VOL} > 2^64.") + + hashmap_vals = torch.empty((capacity,), dtype=torch.uint32, device=device) + + return hashmap_keys, hashmap_vals + + +@torch.no_grad() +def mesh_to_flexible_dual_grid( + vertices: torch.Tensor, + faces: torch.Tensor, + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + aabb: Union[list, tuple, np.ndarray, torch.Tensor] = None, + face_weight: float = 1.0, + boundary_weight: float = 1.0, + regularization_weight: float = 0.1, + timing: bool = False, +) -> Union[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Voxelize a mesh into a sparse voxel grid. + + Args: + vertices (torch.Tensor): The vertices of the mesh. + faces (torch.Tensor): The faces of the mesh. + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + If not provided, it will be computed automatically. + face_weight (float): The weight of the face term in the QEF when solving the dual vertices. + boundary_weight (float): The weight of the boundary term in the QEF when solving the dual vertices. + regularization_weight (float): The weight of the regularization term in the QEF when solving the dual vertices. + timing (bool): Whether to time the voxelization process. + + Returns: + torch.Tensor: The indices of the voxels that are occupied by the mesh. + The shape of the tensor is (N, 3), where N is the number of occupied voxels. + torch.Tensor: The dual vertices of the mesh. + torch.Tensor: The intersected flag of each voxel. + """ + + # Load mesh + vertices = vertices.float() + faces = faces.int() + + # Voxelize settings + assert voxel_size is not None or grid_size is not None, "Either voxel_size or grid_size must be provided" + + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + + if grid_size is not None: + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32) + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + if aabb is not None: + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Auto adjust aabb + if aabb is None: + min_xyz = vertices.min(dim=0).values + max_xyz = vertices.max(dim=0).values + + if voxel_size is not None: + padding = torch.ceil((max_xyz - min_xyz) / voxel_size) * voxel_size - (max_xyz - min_xyz) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + if grid_size is not None: + padding = (max_xyz - min_xyz) / (grid_size - 1) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + + aabb = torch.stack([min_xyz, max_xyz], dim=0).float().cuda() + + # Fill voxel size or grid size + if voxel_size is None: + voxel_size = (aabb[1] - aabb[0]) / grid_size + if grid_size is None: + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + + # subdivide mesh + vertices = vertices - aabb[0].reshape(1, 3) + grid_range = torch.stack([torch.zeros_like(grid_size), grid_size], dim=0).int() + + ret = _C.mesh_to_flexible_dual_grid_cpu( + vertices, + faces, + voxel_size, + grid_range, + face_weight, + boundary_weight, + regularization_weight, + timing, + ) + + return ret + + +def flexible_dual_grid_to_mesh( + coords: torch.Tensor, + dual_vertices: torch.Tensor, + intersected_flag: torch.Tensor, + split_weight: Union[torch.Tensor, None], + aabb: Union[list, tuple, np.ndarray, torch.Tensor], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + train: bool = False, +): + """ + Extract mesh from sparse voxel structures using flexible dual grid. + + Args: + coords (torch.Tensor): The coordinates of the voxels. + dual_vertices (torch.Tensor): The dual vertices. + intersected_flag (torch.Tensor): The intersected flag. + split_weight (torch.Tensor): The split weight of each dual quad. If None, the algorithm + will split based on minimum angle. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + train (bool): Whether to use training mode. + + Returns: + vertices (torch.Tensor): The vertices of the mesh. + faces (torch.Tensor): The faces of the mesh. + """ + # Static variables + if not hasattr(flexible_dual_grid_to_mesh, "edge_neighbor_voxel_offset"): + flexible_dual_grid_to_mesh.edge_neighbor_voxel_offset = torch.tensor([ + [[0, 0, 0], [0, 0, 1], [0, 1, 1], [0, 1, 0]], # x-axis + [[0, 0, 0], [1, 0, 0], [1, 0, 1], [0, 0, 1]], # y-axis + [[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0]], # z-axis + ], dtype=torch.int, device=coords.device).unsqueeze(0) + if not hasattr(flexible_dual_grid_to_mesh, "quad_split_1"): + flexible_dual_grid_to_mesh.quad_split_1 = torch.tensor([0, 1, 2, 0, 2, 3], dtype=torch.long, device=coords.device, requires_grad=False) + if not hasattr(flexible_dual_grid_to_mesh, "quad_split_2"): + flexible_dual_grid_to_mesh.quad_split_2 = torch.tensor([0, 1, 3, 3, 1, 2], dtype=torch.long, device=coords.device, requires_grad=False) + if not hasattr(flexible_dual_grid_to_mesh, "quad_split_train"): + flexible_dual_grid_to_mesh.quad_split_train = torch.tensor([0, 1, 4, 1, 2, 4, 2, 3, 4, 3, 0, 4], dtype=torch.long, device=coords.device, requires_grad=False) + + # AABB + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32, device=coords.device) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Voxel size + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32, device=coords.device) + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + else: + assert grid_size is not None, "Either voxel_size or grid_size must be provided" + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32, device=coords.device) + voxel_size = (aabb[1] - aabb[0]) / grid_size + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + # Extract mesh + N = dual_vertices.shape[0] + mesh_vertices = (coords.float() + dual_vertices) / (2 * N) - 0.5 + + # Store active voxels into hashmap + hashmap = _init_hashmap(grid_size, 2 * N, device=coords.device) + _C.hashmap_insert_3d_idx_as_val_cuda(*hashmap, torch.cat([torch.zeros_like(coords[:, :1]), coords], dim=-1), *grid_size.tolist()) + + # Find connected voxels + edge_neighbor_voxel = coords.reshape(N, 1, 1, 3) + flexible_dual_grid_to_mesh.edge_neighbor_voxel_offset # (N, 3, 4, 3) + connected_voxel = edge_neighbor_voxel[intersected_flag] # (M, 4, 3) + M = connected_voxel.shape[0] + connected_voxel_hash_key = torch.cat([ + torch.zeros((M * 4, 1), dtype=torch.int, device=coords.device), + connected_voxel.reshape(-1, 3) + ], dim=1) + connected_voxel_indices = _C.hashmap_lookup_3d_cuda(*hashmap, connected_voxel_hash_key, *grid_size.tolist()).reshape(M, 4).int() + connected_voxel_valid = (connected_voxel_indices != 0xffffffff).all(dim=1) + quad_indices = connected_voxel_indices[connected_voxel_valid].int() # (L, 4) + L = quad_indices.shape[0] + + # Construct triangles + if not train: + mesh_vertices = (coords.float() + dual_vertices) * voxel_size + aabb[0].reshape(1, 3) + if split_weight is None: + # if split 1 + atempt_triangles_0 = quad_indices[:, flexible_dual_grid_to_mesh.quad_split_1] + normals0 = torch.cross(mesh_vertices[atempt_triangles_0[:, 1]] - mesh_vertices[atempt_triangles_0[:, 0]], mesh_vertices[atempt_triangles_0[:, 2]] - mesh_vertices[atempt_triangles_0[:, 0]]) + normals1 = torch.cross(mesh_vertices[atempt_triangles_0[:, 2]] - mesh_vertices[atempt_triangles_0[:, 1]], mesh_vertices[atempt_triangles_0[:, 3]] - mesh_vertices[atempt_triangles_0[:, 1]]) + align0 = (normals0 * normals1).sum(dim=1, keepdim=True).abs() + # if split 2 + atempt_triangles_1 = quad_indices[:, flexible_dual_grid_to_mesh.quad_split_2] + normals0 = torch.cross(mesh_vertices[atempt_triangles_1[:, 1]] - mesh_vertices[atempt_triangles_1[:, 0]], mesh_vertices[atempt_triangles_1[:, 2]] - mesh_vertices[atempt_triangles_1[:, 0]]) + normals1 = torch.cross(mesh_vertices[atempt_triangles_1[:, 2]] - mesh_vertices[atempt_triangles_1[:, 1]], mesh_vertices[atempt_triangles_1[:, 3]] - mesh_vertices[atempt_triangles_1[:, 1]]) + align1 = (normals0 * normals1).sum(dim=1, keepdim=True).abs() + # select split + mesh_triangles = torch.where(align0 > align1, atempt_triangles_0, atempt_triangles_1).reshape(-1, 3) + else: + split_weight_ws = split_weight[quad_indices] + split_weight_ws_02 = split_weight_ws[:, 0] * split_weight_ws[:, 2] + split_weight_ws_13 = split_weight_ws[:, 1] * split_weight_ws[:, 3] + mesh_triangles = torch.where( + split_weight_ws_02 > split_weight_ws_13, + quad_indices[:, flexible_dual_grid_to_mesh.quad_split_1], + quad_indices[:, flexible_dual_grid_to_mesh.quad_split_2] + ).reshape(-1, 3) + else: + assert split_weight is not None, "split_weight must be provided in training mode" + mesh_vertices = (coords.float() + dual_vertices) * voxel_size + aabb[0].reshape(1, 3) + quad_vs = mesh_vertices[quad_indices] + mean_v02 = (quad_vs[:, 0] + quad_vs[:, 2]) / 2 + mean_v13 = (quad_vs[:, 1] + quad_vs[:, 3]) / 2 + split_weight_ws = split_weight[quad_indices] + split_weight_ws_02 = split_weight_ws[:, 0] * split_weight_ws[:, 2] + split_weight_ws_13 = split_weight_ws[:, 1] * split_weight_ws[:, 3] + mid_vertices = ( + split_weight_ws_02 * mean_v02 + + split_weight_ws_13 * mean_v13 + ) / (split_weight_ws_02 + split_weight_ws_13) + mesh_vertices = torch.cat([mesh_vertices, mid_vertices], dim=0) + quad_indices = torch.cat([quad_indices, torch.arange(N, N + L, device='cuda').unsqueeze(1)], dim=1) + mesh_triangles = quad_indices[:, flexible_dual_grid_to_mesh.quad_split_train].reshape(-1, 3) + + return mesh_vertices, mesh_triangles diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/volumetic_attr.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/volumetic_attr.py new file mode 100644 index 0000000000000000000000000000000000000000..fe24bfe876f01cceb02bdb5859232fa95779b5c6 --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/convert/volumetic_attr.py @@ -0,0 +1,583 @@ +from typing import * +import io +from PIL import Image +import torch +import numpy as np +from tqdm import tqdm +import trimesh +import trimesh.visual + +from .. import _C + +__all__ = [ + "textured_mesh_to_volumetric_attr", + "blender_dump_to_volumetric_attr" +] + + +ALPHA_MODE_ENUM = { + "OPAQUE": 0, + "MASK": 1, + "BLEND": 2, +} + + +def is_power_of_two(n: int) -> bool: + return n > 0 and (n & (n - 1)) == 0 + + +def nearest_power_of_two(n: int) -> int: + if n < 1: + raise ValueError("n must be >= 1") + if is_power_of_two(n): + return n + lower = 2 ** (n.bit_length() - 1) + upper = 2 ** n.bit_length() + if n - lower < upper - n: + return lower + else: + return upper + + +def textured_mesh_to_volumetric_attr( + mesh: Union[trimesh.Scene, trimesh.Trimesh, str], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + aabb: Union[list, tuple, np.ndarray, torch.Tensor] = None, + mip_level_offset: float = 0.0, + verbose: bool = False, + timing: bool = False, +) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Voxelize a mesh into a sparse voxel grid with PBR properties. + + Args: + mesh (trimesh.Scene, trimesh.Trimesh, str): The input mesh. + If a string is provided, it will be loaded as a mesh using trimesh.load(). + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + If not provided, it will be computed automatically. + tile_size (int): The size of the tiles used for each individual voxelization. + mip_level_offset (float): The mip level offset for texture mip level selection. + verbose (bool): Whether to print the settings. + timing (bool): Whether to print the timing information. + + Returns: + torch.Tensor: The indices of the voxels that are occupied by the mesh. + Dict[str, torch.Tensor]: A dictionary containing the following keys: + - "base_color": The base color of the occupied voxels. + - "metallic": The metallic value of the occupied voxels. + - "roughness": The roughness value of the occupied voxels. + - "emissive": The emissive value of the occupied voxels. + - "alpha": The alpha value of the occupied voxels. + - "normal": The normal of the occupied voxels. + """ + + # Load mesh + if isinstance(mesh, str): + mesh = trimesh.load(mesh) + if isinstance(mesh, trimesh.Scene): + groups = mesh.dump() + if isinstance(mesh, trimesh.Trimesh): + groups = [mesh] + scene = trimesh.Scene(groups) + + # Voxelize settings + assert voxel_size is not None or grid_size is not None, "Either voxel_size or grid_size must be provided" + + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + + if grid_size is not None: + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32) + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + if aabb is not None: + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Auto adjust aabb + if aabb is None: + aabb = scene.bounds + min_xyz = aabb[0] + max_xyz = aabb[1] + + if voxel_size is not None: + padding = torch.ceil((max_xyz - min_xyz) / voxel_size) * voxel_size - (max_xyz - min_xyz) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + if grid_size is not None: + padding = (max_xyz - min_xyz) / (grid_size - 1) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + + aabb = torch.stack([min_xyz, max_xyz], dim=0).float() + + # Fill voxel size or grid size + if voxel_size is None: + voxel_size = (aabb[1] - aabb[0]) / grid_size + if grid_size is None: + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + + grid_range = torch.stack([torch.zeros_like(grid_size), grid_size], dim=0).int() + + # Print settings + if verbose: + print(f"Voxelize settings:") + print(f" Voxel size: {voxel_size}") + print(f" Grid size: {grid_size}") + print(f" AABB: {aabb}") + + # Load Scene + scene_buffers = { + 'triangles': [], + 'normals': [], + 'uvs': [], + 'material_ids': [], + 'base_color_factor': [], + 'base_color_texture': [], + 'metallic_factor': [], + 'metallic_texture': [], + 'roughness_factor': [], + 'roughness_texture': [], + 'emissive_factor': [], + 'emissive_texture': [], + 'alpha_mode': [], + 'alpha_cutoff': [], + 'alpha_factor': [], + 'alpha_texture': [], + 'normal_texture': [], + } + for sid, (name, g) in tqdm(enumerate(scene.geometry.items()), total=len(scene.geometry), desc="Loading Scene", disable=not verbose): + if verbose: + print(f"Geometry: {name}") + print(f" Visual: {g.visual}") + print(f" Triangles: {g.triangles.shape[0]}") + print(f" Vertices: {g.vertices.shape[0]}") + print(f" Normals: {g.vertex_normals.shape[0]}") + if g.visual.material.baseColorFactor is not None: + print(f" Base color factor: {g.visual.material.baseColorFactor}") + if g.visual.material.baseColorTexture is not None: + print(f" Base color texture: {g.visual.material.baseColorTexture.size} {g.visual.material.baseColorTexture.mode}") + if g.visual.material.metallicFactor is not None: + print(f" Metallic factor: {g.visual.material.metallicFactor}") + if g.visual.material.roughnessFactor is not None: + print(f" Roughness factor: {g.visual.material.roughnessFactor}") + if g.visual.material.metallicRoughnessTexture is not None: + print(f" Metallic roughness texture: {g.visual.material.metallicRoughnessTexture.size} {g.visual.material.metallicRoughnessTexture.mode}") + if g.visual.material.emissiveFactor is not None: + print(f" Emissive factor: {g.visual.material.emissiveFactor}") + if g.visual.material.emissiveTexture is not None: + print(f" Emissive texture: {g.visual.material.emissiveTexture.size} {g.visual.material.emissiveTexture.mode}") + if g.visual.material.alphaMode is not None: + print(f" Alpha mode: {g.visual.material.alphaMode}") + if g.visual.material.alphaCutoff is not None: + print(f" Alpha cutoff: {g.visual.material.alphaCutoff}") + if g.visual.material.normalTexture is not None: + print(f" Normal texture: {g.visual.material.normalTexture.size} {g.visual.material.normalTexture.mode}") + + assert isinstance(g, trimesh.Trimesh), f"Only trimesh.Trimesh is supported, but got {type(g)}" + assert isinstance(g.visual, trimesh.visual.TextureVisuals), f"Only trimesh.visual.TextureVisuals is supported, but got {type(g.visual)}" + assert isinstance(g.visual.material, trimesh.visual.material.PBRMaterial), f"Only trimesh.visual.material.PBRMaterial is supported, but got {type(g.visual.material)}" + triangles = torch.tensor(g.triangles, dtype=torch.float32) - aabb[0].reshape(1, 1, 3) # [N, 3, 3] + normals = torch.tensor(g.vertex_normals[g.faces], dtype=torch.float32) # [N, 3, 3] + uvs = torch.tensor(g.visual.uv[g.faces], dtype=torch.float32) if g.visual.uv is not None \ + else torch.zeros(g.triangles.shape[0], 3, 2, dtype=torch.float32) # [N, 3, 2] + baseColorFactor = torch.tensor(g.visual.material.baseColorFactor / 255, dtype=torch.float32) if g.visual.material.baseColorFactor is not None \ + else torch.ones(3, dtype=torch.float32) # [3] + baseColorTexture = torch.tensor(np.array(g.visual.material.baseColorTexture.convert('RGBA'))[..., :3], dtype=torch.uint8) if g.visual.material.baseColorTexture is not None \ + else torch.tensor([]) # [H, W, 3] + metallicFactor = g.visual.material.metallicFactor if g.visual.material.metallicFactor is not None else 1.0 + metallicTexture = torch.tensor(np.array(g.visual.material.metallicRoughnessTexture.convert('RGB'))[..., 2], dtype=torch.uint8) if g.visual.material.metallicRoughnessTexture is not None \ + else torch.tensor([]) # [H, W] + roughnessFactor = g.visual.material.roughnessFactor if g.visual.material.roughnessFactor is not None else 1.0 + roughnessTexture = torch.tensor(np.array(g.visual.material.metallicRoughnessTexture.convert('RGB'))[..., 1], dtype=torch.uint8) if g.visual.material.metallicRoughnessTexture is not None \ + else torch.tensor([]) # [H, W] + emissiveFactor = torch.tensor(g.visual.material.emissiveFactor, dtype=torch.float32) if g.visual.material.emissiveFactor is not None \ + else torch.zeros(3, dtype=torch.float32) # [3] + emissiveTexture = torch.tensor(np.array(g.visual.material.emissiveTexture.convert('RGB'))[..., :3], dtype=torch.uint8) if g.visual.material.emissiveTexture is not None \ + else torch.tensor([]) # [H, W, 3] + alphaMode = ALPHA_MODE_ENUM[g.visual.material.alphaMode] if g.visual.material.alphaMode in ALPHA_MODE_ENUM else 0 + alphaCutoff = g.visual.material.alphaCutoff if g.visual.material.alphaCutoff is not None else 0.5 + alphaFactor = g.visual.material.baseColorFactor[3] / 255 if g.visual.material.baseColorFactor is not None else 1.0 + alphaTexture = torch.tensor(np.array(g.visual.material.baseColorTexture.convert('RGBA'))[..., 3], dtype=torch.uint8) if g.visual.material.baseColorTexture is not None and alphaMode != 0 \ + else torch.tensor([]) # [H, W] + normalTexture = torch.tensor(np.array(g.visual.material.normalTexture.convert('RGB'))[..., :3], dtype=torch.uint8) if g.visual.material.normalTexture is not None \ + else torch.tensor([]) # [H, W, 3] + + scene_buffers['triangles'].append(triangles) + scene_buffers['normals'].append(normals) + scene_buffers['uvs'].append(uvs) + scene_buffers['material_ids'].append(torch.full((triangles.shape[0],), sid, dtype=torch.int32)) + scene_buffers['base_color_factor'].append(baseColorFactor) + scene_buffers['base_color_texture'].append(baseColorTexture) + scene_buffers['metallic_factor'].append(metallicFactor) + scene_buffers['metallic_texture'].append(metallicTexture) + scene_buffers['roughness_factor'].append(roughnessFactor) + scene_buffers['roughness_texture'].append(roughnessTexture) + scene_buffers['emissive_factor'].append(emissiveFactor) + scene_buffers['emissive_texture'].append(emissiveTexture) + scene_buffers['alpha_mode'].append(alphaMode) + scene_buffers['alpha_cutoff'].append(alphaCutoff) + scene_buffers['alpha_factor'].append(alphaFactor) + scene_buffers['alpha_texture'].append(alphaTexture) + scene_buffers['normal_texture'].append(normalTexture) + + scene_buffers['triangles'] = torch.cat(scene_buffers['triangles'], dim=0) # [N, 3, 3] + scene_buffers['normals'] = torch.cat(scene_buffers['normals'], dim=0) # [N, 3, 3] + scene_buffers['uvs'] = torch.cat(scene_buffers['uvs'], dim=0) # [N, 3, 2] + scene_buffers['material_ids'] = torch.cat(scene_buffers['material_ids'], dim=0) # [N] + + # Voxelize + out_tuple = _C.textured_mesh_to_volumetric_attr_cpu( + voxel_size, + grid_range, + scene_buffers["triangles"], + scene_buffers["normals"], + scene_buffers["uvs"], + scene_buffers["material_ids"], + scene_buffers["base_color_factor"], + scene_buffers["base_color_texture"], + [1] * len(scene_buffers["base_color_texture"]), + [0] * len(scene_buffers["base_color_texture"]), + scene_buffers["metallic_factor"], + scene_buffers["metallic_texture"], + [1] * len(scene_buffers["metallic_texture"]), + [0] * len(scene_buffers["metallic_texture"]), + scene_buffers["roughness_factor"], + scene_buffers["roughness_texture"], + [1] * len(scene_buffers["roughness_texture"]), + [0] * len(scene_buffers["roughness_texture"]), + scene_buffers["emissive_factor"], + scene_buffers["emissive_texture"], + [1] * len(scene_buffers["emissive_texture"]), + [0] * len(scene_buffers["emissive_texture"]), + scene_buffers["alpha_mode"], + scene_buffers["alpha_cutoff"], + scene_buffers["alpha_factor"], + scene_buffers["alpha_texture"], + [1] * len(scene_buffers["alpha_texture"]), + [0] * len(scene_buffers["alpha_texture"]), + scene_buffers["normal_texture"], + [1] * len(scene_buffers["normal_texture"]), + [0] * len(scene_buffers["normal_texture"]), + mip_level_offset, + timing, + ) + + # Post process + coord = out_tuple[0] + attr = { + "base_color": torch.clamp(out_tuple[1] * 255, 0, 255).byte().reshape(-1, 3), + "metallic": torch.clamp(out_tuple[2] * 255, 0, 255).byte().reshape(-1, 1), + "roughness": torch.clamp(out_tuple[3] * 255, 0, 255).byte().reshape(-1, 1), + "emissive": torch.clamp(out_tuple[4] * 255, 0, 255).byte().reshape(-1, 3), + "alpha": torch.clamp(out_tuple[5] * 255, 0, 255).byte().reshape(-1, 1), + "normal": torch.clamp((out_tuple[6] * 0.5 + 0.5) * 255, 0, 255).byte().reshape(-1, 3), + } + + return coord, attr + + +def blender_dump_to_volumetric_attr( + dump: Dict[str, Any], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + aabb: Union[list, tuple, np.ndarray, torch.Tensor] = None, + mip_level_offset: float = 0.0, + verbose: bool = False, + timing: bool = False, +) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Voxelize a mesh into a sparse voxel grid with PBR properties. + + Args: + dump (Dict[str, Any]): Dumped data from a blender scene. + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + If not provided, it will be computed automatically. + mip_level_offset (float): The mip level offset for texture mip level selection. + verbose (bool): Whether to print the settings. + timing (bool): Whether to print the timing information. + + Returns: + torch.Tensor: The indices of the voxels that are occupied by the mesh. + Dict[str, torch.Tensor]: A dictionary containing the following keys: + - "base_color": The base color of the occupied voxels. + - "metallic": The metallic value of the occupied voxels. + - "roughness": The roughness value of the occupied voxels. + - "emissive": The emissive value of the occupied voxels. + - "alpha": The alpha value of the occupied voxels. + - "normal": The normal of the occupied voxels. + """ + # Voxelize settings + assert voxel_size is not None or grid_size is not None, "Either voxel_size or grid_size must be provided" + + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + + if grid_size is not None: + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32) + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + if aabb is not None: + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Auto adjust aabb + if aabb is None: + min_xyz = np.min([ + object['vertices'].min(axis=0) + for object in dump['objects'] + ], axis=0) + max_xyz = np.max([ + object['vertices'].max(axis=0) + for object in dump['objects'] + ], axis=0) + + if voxel_size is not None: + padding = torch.ceil((max_xyz - min_xyz) / voxel_size) * voxel_size - (max_xyz - min_xyz) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + if grid_size is not None: + padding = (max_xyz - min_xyz) / (grid_size - 1) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + + aabb = torch.stack([min_xyz, max_xyz], dim=0).float() + + # Fill voxel size or grid size + if voxel_size is None: + voxel_size = (aabb[1] - aabb[0]) / grid_size + if grid_size is None: + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + + grid_range = torch.stack([torch.zeros_like(grid_size), grid_size], dim=0).int() + + # Print settings + if verbose: + print(f"Voxelize settings:") + print(f" Voxel size: {voxel_size}") + print(f" Grid size: {grid_size}") + print(f" AABB: {aabb}") + + # Load Scene + scene_buffers = { + 'triangles': [], + 'normals': [], + 'uvs': [], + 'material_ids': [], + 'base_color_factor': [], + 'base_color_texture': [], + 'base_color_texture_filter': [], + 'base_color_texture_wrap': [], + 'metallic_factor': [], + 'metallic_texture': [], + 'metallic_texture_filter': [], + 'metallic_texture_wrap': [], + 'roughness_factor': [], + 'roughness_texture': [], + 'roughness_texture_filter': [], + 'roughness_texture_wrap': [], + 'alpha_mode': [], + 'alpha_cutoff': [], + 'alpha_factor': [], + 'alpha_texture': [], + 'alpha_texture_filter': [], + 'alpha_texture_wrap': [], + } + + def load_texture(pack): + png_bytes = pack['image'] + image = Image.open(io.BytesIO(png_bytes)) + if image.width != image.height or not is_power_of_two(image.width): + size = nearest_power_of_two(max(image.width, image.height)) + image = image.resize((size, size), Image.LANCZOS) + texture = torch.tensor(np.array(image), dtype=torch.uint8) + filter_mode = { + 'Linear': 1, + 'Closest': 0, + 'Cubic': 1, + 'Smart': 1, + }[pack['interpolation']] + wrap_mode = { + 'REPEAT': 0, + 'EXTEND': 1, + 'CLIP': 1, + 'MIRROR': 2, + }[pack['extension']] + return texture, filter_mode, wrap_mode + + for material in dump['materials']: + baseColorFactor = torch.tensor(material['baseColorFactor'][:3], dtype=torch.float32) + if material['baseColorTexture'] is not None: + baseColorTexture, baseColorTextureFilter, baseColorTextureWrap = \ + load_texture(material['baseColorTexture']) + assert baseColorTexture.shape[2] == 3, f"Base color texture must have 3 channels, but got {baseColorTexture.shape[2]}" + else: + baseColorTexture = torch.tensor([]) + baseColorTextureFilter = 0 + baseColorTextureWrap = 0 + scene_buffers['base_color_factor'].append(baseColorFactor) + scene_buffers['base_color_texture'].append(baseColorTexture) + scene_buffers['base_color_texture_filter'].append(baseColorTextureFilter) + scene_buffers['base_color_texture_wrap'].append(baseColorTextureWrap) + + metallicFactor = material['metallicFactor'] + if material['metallicTexture'] is not None: + metallicTexture, metallicTextureFilter, metallicTextureWrap = \ + load_texture(material['metallicTexture']) + assert metallicTexture.dim() == 2, f"Metallic roughness texture must have 2 dimensions, but got {metallicTexture.dim()}" + else: + metallicTexture = torch.tensor([]) + metallicTextureFilter = 0 + metallicTextureWrap = 0 + scene_buffers['metallic_factor'].append(metallicFactor) + scene_buffers['metallic_texture'].append(metallicTexture) + scene_buffers['metallic_texture_filter'].append(metallicTextureFilter) + scene_buffers['metallic_texture_wrap'].append(metallicTextureWrap) + + roughnessFactor = material['roughnessFactor'] + if material['roughnessTexture'] is not None: + roughnessTexture, roughnessTextureFilter, roughnessTextureWrap = \ + load_texture(material['roughnessTexture']) + assert roughnessTexture.dim() == 2, f"Metallic roughness texture must have 2 dimensions, but got {roughnessTexture.dim()}" + else: + roughnessTexture = torch.tensor([]) + roughnessTextureFilter = 0 + roughnessTextureWrap = 0 + scene_buffers['roughness_factor'].append(roughnessFactor) + scene_buffers['roughness_texture'].append(roughnessTexture) + scene_buffers['roughness_texture_filter'].append(roughnessTextureFilter) + scene_buffers['roughness_texture_wrap'].append(roughnessTextureWrap) + + alphaMode = ALPHA_MODE_ENUM[material['alphaMode']] + alphaCutoff = material['alphaCutoff'] + alphaFactor = material['alphaFactor'] + if material['alphaTexture'] is not None: + alphaTexture, alphaTextureFilter, alphaTextureWrap = \ + load_texture(material['alphaTexture']) + assert alphaTexture.dim() == 2, f"Alpha texture must have 2 dimensions, but got {alphaTexture.dim()}" + else: + alphaTexture = torch.tensor([]) + alphaTextureFilter = 0 + alphaTextureWrap = 0 + scene_buffers['alpha_mode'].append(alphaMode) + scene_buffers['alpha_cutoff'].append(alphaCutoff) + scene_buffers['alpha_factor'].append(alphaFactor) + scene_buffers['alpha_texture'].append(alphaTexture) + scene_buffers['alpha_texture_filter'].append(alphaTextureFilter) + scene_buffers['alpha_texture_wrap'].append(alphaTextureWrap) + + for object in dump['objects']: + triangles = torch.tensor(object['vertices'][object['faces']], dtype=torch.float32).reshape(-1, 3, 3) - aabb[0].reshape(1, 1, 3) + normails = torch.tensor(object['normals'], dtype=torch.float32) + uvs = torch.tensor(object['uvs'], dtype=torch.float32) if object['uvs'] is not None else torch.zeros(triangles.shape[0], 3, 2, dtype=torch.float32) + material_id = torch.tensor(object['mat_ids'], dtype=torch.int32) + scene_buffers['triangles'].append(triangles) + scene_buffers['normals'].append(normails) + scene_buffers['uvs'].append(uvs) + scene_buffers['material_ids'].append(material_id) + + scene_buffers['triangles'] = torch.cat(scene_buffers['triangles'], dim=0) # [N, 3, 3] + scene_buffers['normals'] = torch.cat(scene_buffers['normals'], dim=0) # [N, 3, 3] + scene_buffers['uvs'] = torch.cat(scene_buffers['uvs'], dim=0) # [N, 3, 2] + scene_buffers['material_ids'] = torch.cat(scene_buffers['material_ids'], dim=0) # [N] + + scene_buffers['uvs'][:, :, 1] = 1 - scene_buffers['uvs'][:, :, 1] # Flip v coordinate + + # Voxelize + out_tuple = _C.textured_mesh_to_volumetric_attr_cpu( + voxel_size, + grid_range, + scene_buffers["triangles"], + scene_buffers["normals"], + scene_buffers["uvs"], + scene_buffers["material_ids"], + scene_buffers["base_color_factor"], + scene_buffers["base_color_texture"], + scene_buffers["base_color_texture_filter"], + scene_buffers["base_color_texture_wrap"], + scene_buffers["metallic_factor"], + scene_buffers["metallic_texture"], + scene_buffers["metallic_texture_filter"], + scene_buffers["metallic_texture_wrap"], + scene_buffers["roughness_factor"], + scene_buffers["roughness_texture"], + scene_buffers["roughness_texture_filter"], + scene_buffers["roughness_texture_wrap"], + [torch.zeros(3, dtype=torch.float32) for _ in range(len(scene_buffers["base_color_texture"]))], + [torch.tensor([]) for _ in range(len(scene_buffers["base_color_texture"]))], + [0] * len(scene_buffers["base_color_texture"]), + [0] * len(scene_buffers["base_color_texture"]), + scene_buffers["alpha_mode"], + scene_buffers["alpha_cutoff"], + scene_buffers["alpha_factor"], + scene_buffers["alpha_texture"], + scene_buffers["alpha_texture_filter"], + scene_buffers["alpha_texture_wrap"], + [torch.tensor([]) for _ in range(len(scene_buffers["base_color_texture"]))], + [0] * len(scene_buffers["base_color_texture"]), + [0] * len(scene_buffers["base_color_texture"]), + mip_level_offset, + timing, + ) + + # Post process + coord = out_tuple[0] + attr = { + "base_color": torch.clamp(out_tuple[1] * 255, 0, 255).byte().reshape(-1, 3), + "metallic": torch.clamp(out_tuple[2] * 255, 0, 255).byte().reshape(-1, 1), + "roughness": torch.clamp(out_tuple[3] * 255, 0, 255).byte().reshape(-1, 1), + "emissive": torch.clamp(out_tuple[4] * 255, 0, 255).byte().reshape(-1, 3), + "alpha": torch.clamp(out_tuple[5] * 255, 0, 255).byte().reshape(-1, 1), + "normal": torch.clamp((out_tuple[6] * 0.5 + 0.5) * 255, 0, 255).byte().reshape(-1, 3), + } + + return coord, attr \ No newline at end of file diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/__init__.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db7eca220accb46c62dfac93f078c2938969866a --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/__init__.py @@ -0,0 +1,45 @@ +from typing import Dict, Union +import torch +from .ply import * +from .npz import * +from .vxz import * + + +def read(file_path: str) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a file containing voxels. + + Args: + file_path: Path to the file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + if file_path.endswith('.npz'): + return read_npz(file_path) + elif file_path.endswith('.ply'): + return read_ply(file_path) + elif file_path.endswith('.vxz'): + return read_vxz(file_path) + else: + raise ValueError(f"Unsupported file type {file_path}") + + +def write(file_path: str, coord: torch.Tensor, attr: Dict[str, torch.Tensor], **kwargs): + """ + Write a file containing voxels. + + Args: + file_path: Path to the file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + """ + if file_path.endswith('.npz'): + write_npz(file_path, coord, attr, **kwargs) + elif file_path.endswith('.ply'): + write_ply(file_path, coord, attr, **kwargs) + elif file_path.endswith('.vxz'): + write_vxz(file_path, coord, attr, **kwargs) + else: + raise ValueError(f"Unsupported file type {file_path}") diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/npz.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/npz.py new file mode 100644 index 0000000000000000000000000000000000000000..17da9efe2b937540282cbaf25c385f19d0848be9 --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/npz.py @@ -0,0 +1,43 @@ +from typing import * +import torch +import numpy as np + + +__all__ = [ + "read_npz", + "write_npz", +] + + +def read_npz(file) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a NPZ file containing voxels. + + Args: + file_path: Path or file object from which to read the NPZ file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + data = np.load(file) + coord = torch.from_numpy(data['coord']).int() + attr = {k: torch.from_numpy(v) for k, v in data.items() if k!= 'coord'} + return coord, attr + + +def write_npz(file, coord: torch.Tensor, attr: Dict[str, torch.Tensor], compress=True): + """ + Write a NPZ file containing voxels. + + Args: + file_path: Path or file object to which to write the NPZ file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + """ + data = {'coord': coord.cpu().numpy().astype(np.uint16)} + data.update({k: v.cpu().numpy() for k, v in attr.items()}) + if compress: + np.savez_compressed(file, **data) + else: + np.savez(file, **data) diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/ply.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/ply.py new file mode 100644 index 0000000000000000000000000000000000000000..747693218fabfaca994e1f23707878c3cae7b4c9 --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/ply.py @@ -0,0 +1,72 @@ +from typing import * +import io +import torch +import numpy as np +import plyfile + + +__all__ = [ + "read_ply", + "write_ply", +] + + +DTYPE_MAP = { + torch.uint8: 'u1', + torch.uint16: 'u2', + torch.uint32: 'u4', + torch.int8: 'i1', + torch.int16: 'i2', + torch.int32: 'i4', + torch.float32: 'f4', + torch.float64: 'f8' +} + + +def read_ply(file) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a PLY file containing voxels. + + Args: + file: Path or file-like object of the PLY file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + plydata = plyfile.PlyData.read(file) + xyz = np.stack([plydata.elements[0][k] for k in ['x', 'y', 'z']], axis=1) + coord = np.round(xyz).astype(int) + coord = torch.from_numpy(coord) + + attr_keys = [k for k in plydata.elements[0].data.dtype.names if k not in ['x', 'y', 'z']] + attr_names = ['_'.join(k.split('_')[:-1]) for k in attr_keys] + attr_chs = [sum([1 for k in attr_keys if k.startswith(f'{name}_')]) for name in attr_names] + + attr = {} + for i, name in enumerate(attr_names): + attr[name] = np.stack([plydata.elements[0][f'{name}_{j}'] for j in range(attr_chs[i])], axis=1) + attr = {k: torch.from_numpy(v) for k, v in attr.items()} + + return coord, attr + + +def write_ply(file, coord: torch.Tensor, attr: Dict[str, torch.Tensor]): + """ + Write a PLY file containing voxels. + + Args: + file: Path or file-like object of the PLY file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + """ + dtypes = [('x', 'f4'), ('y', 'f4'), ('z', 'f4')] + for k, v in attr.items(): + for j in range(v.shape[-1]): + assert v.dtype in DTYPE_MAP, f"Unsupported data type {v.dtype} for attribute {k}" + dtypes.append((f'{k}_{j}', DTYPE_MAP[v.dtype])) + data = np.empty(len(coord), dtype=dtypes) + all_chs = np.concatenate([coord.cpu().numpy().astype(np.float32)] + [v.cpu().numpy() for v in attr.values()], axis=1) + data[:] = list(map(tuple, all_chs)) + plyfile.PlyData([plyfile.PlyElement.describe(data, 'vertex')]).write(file) + \ No newline at end of file diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/vxz.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/vxz.py new file mode 100644 index 0000000000000000000000000000000000000000..91fba74d26d2edaabbcd7edb707d272cbca25b8a --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/io/vxz.py @@ -0,0 +1,365 @@ +from typing import * +import os +import json +import struct +import torch +import numpy as np +import zlib +import lzma +import zstandard +from concurrent.futures import ThreadPoolExecutor +from ..serialize import encode_seq, decode_seq +from .. import _C + + +__all__ = [ + "read_vxz", + "read_vxz_info", + "write_vxz", +] + + +""" +VXZ format + +Header: +- file type (3 bytes) - 'VXZ' +- version (1 byte) - 0 +- binary start offset (4 bytes) +- structure (json) - +{ + "num_voxel": int, + "chunk_size": int, + "filter": str, + "compression": str, + "compression_level": int, + "raw_size": int, + "compressed_size": int, + "compress_ratio": float, + "attr_interleave": str, + "attr": [ + {"name": str, "chs": int}, + ... + ] + "chunks": [ + { + "ptr": [offset, length], # offset from global binary start + "svo": [offset, length], # offset from this chunk start + "attr": [offset, length], # offset from this chunk start + }, + ... + ] +} +- binary data +""" + +DEFAULT_COMPRESION_LEVEL = { + 'none': 0, + 'deflate': 9, + 'lzma': 9, + 'zstd': 22, +} + + +def _compress(data: bytes, algo: Literal['none', 'deflate', 'lzma', 'zstd'], level: int) -> bytes: + if algo == 'none': + return data + if level is None: + level = DEFAULT_COMPRESION_LEVEL[algo] + if algo == 'deflate': + compresser = zlib.compressobj(level, wbits=-15) + return compresser.compress(data) + compresser.flush() + if algo == 'lzma': + compresser = lzma.LZMACompressor(format=lzma.FORMAT_RAW, filters=[{'id': lzma.FILTER_LZMA2, 'preset': level}]) + return compresser.compress(data) + compresser.flush() + if algo == 'zstd': + compresser = zstandard.ZstdCompressor(level=level, write_checksum=False, write_content_size=True, threads=-1) + return compresser.compress(data) + raise ValueError(f"Invalid compression algorithm: {algo}") + + +def _decompress(data: bytes, algo: Literal['none', 'deflate', 'lzma', 'zstd'], level: int) -> bytes: + if algo == 'none': + return data + if level is None: + level = DEFAULT_COMPRESION_LEVEL[algo] + if algo == 'deflate': + decompresser = zlib.decompressobj(wbits=-15) + return decompresser.decompress(data) + decompresser.flush() + if algo == 'lzma': + decompresser = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=[{'id': lzma.FILTER_LZMA2, 'preset': level}]) + return decompresser.decompress(data) + if algo == 'zstd': + decompresser = zstandard.ZstdDecompressor(format=zstandard.FORMAT_ZSTD1) + return decompresser.decompress(data) + raise ValueError(f"Invalid compression algorithm: {algo}") + + +def read_vxz_info(file) -> Dict: + """ + Read the header of a VXZ file without decompressing the binary data. + + Args: + file_path: Path or file-like object to the VXZ file. + + Returns: + Dict: the header of the VXZ file. + """ + if isinstance(file, str): + with open(file, 'rb') as f: + file_data = f.read() + else: + file_data = file.read() + + assert file_data[:3] == b'VXZ', "Invalid file type" + version = file_data[3] + assert version == 0, "Invalid file version" + + bin_start = struct.unpack('>I', file_data[4:8])[0] + structure_data = json.loads(file_data[8:bin_start].decode()) + return structure_data + + +def read_vxz(file, num_threads: int = -1) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a VXZ file containing voxels. + + Args: + file_path: Path or file-like object to the VXZ file. + num_threads: the number of threads to use for reading the file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + if isinstance(file, str): + with open(file, 'rb') as f: + file_data = f.read() + else: + file_data = file.read() + + num_threads = num_threads if num_threads > 0 else os.cpu_count() + + # Parse header + assert file_data[:3] == b'VXZ', "Invalid file type" + version = file_data[3] + assert version == 0, "Invalid file version" + + bin_start = struct.unpack('>I', file_data[4:8])[0] + structure_data = json.loads(file_data[8:bin_start].decode()) + bin_data = file_data[bin_start:] + + # Decode chunks + chunk_size = structure_data['chunk_size'] + chunk_depth = np.log2(chunk_size) + assert chunk_depth.is_integer(), f"Chunk size must be a power of 2, got {chunk_size}" + chunk_depth = int(chunk_depth) + + def worker(chunk_info): + decompressed = {} + chunk_data = bin_data[chunk_info['ptr'][0]:chunk_info['ptr'][0]+chunk_info['ptr'][1]] + for k, v in chunk_info.items(): + if k in ['ptr', 'idx']: + continue + decompressed[k] = np.frombuffer(_decompress(chunk_data[v[0]:v[0]+v[1]], structure_data['compression'], structure_data['compression_level']), dtype=np.uint8) + svo = torch.tensor(np.frombuffer(decompressed['svo'], dtype=np.uint8)) + morton_code = _C.decode_sparse_voxel_octree_cpu(svo, chunk_depth) + coord = decode_seq(morton_code.int()).cpu() + + # deinterleave attributes + if structure_data['attr_interleave'] == 'none': + all_attr = [] + for k, chs in structure_data['attr']: + for i in range(chs): + all_attr.append(torch.tensor(decompressed[f'{k}_{i}'])) + all_attr = torch.stack(all_attr, dim=1) + elif structure_data['attr_interleave'] == 'as_is': + all_attr = [] + for k, chs in structure_data['attr']: + all_attr.append(torch.tensor(decompressed[k].reshape(-1, chs))) + all_attr = torch.cat(all_attr, dim=1) + elif structure_data['attr_interleave'] == 'all': + all_chs = sum(chs for k, chs in structure_data['attr']) + all_attr = decompressed['attr'].reshape(-1, all_chs) + + # unfilter + if structure_data['filter'] == 'none': + pass + elif structure_data['filter'] == 'parent': + all_attr = _C.decode_sparse_voxel_octree_attr_parent_cpu(svo, chunk_depth, all_attr) + elif structure_data['filter'] == 'neighbor': + all_attr = _C.decode_sparse_voxel_octree_attr_neighbor_cpu(coord, chunk_size, all_attr) + + # final + attr = {} + ch = 0 + for k, chs in structure_data['attr']: + attr[k] = all_attr[:, ch:ch+chs] + ch += chs + return { + 'coord': coord, + 'attr': attr, + } + + if num_threads == 1: + chunks = [worker(info) for info in structure_data['chunks']] + else: + with ThreadPoolExecutor(max_workers=num_threads) as executor: + chunks = list(executor.map(worker, structure_data['chunks'])) + + # Combine chunks + coord = [] + attr = {k: [] for k, _ in structure_data['attr']} + for info, chunk in zip(structure_data['chunks'], chunks): + coord.append(chunk['coord'] + torch.tensor([[info['idx'][0] * chunk_size, info['idx'][1] * chunk_size, info['idx'][2] * chunk_size]]).int()) + for k, v in chunk['attr'].items(): + attr[k].append(v) + coord = torch.cat(coord, dim=0) + for k, v in attr.items(): + attr[k] = torch.cat(v, dim=0) + return coord, attr + + +def write_vxz( + file, + coord: torch.Tensor, + attr: Dict[str, torch.Tensor], + chunk_size: int = 256, + filter: Literal['none', 'parent', 'neighbor'] = 'none', + compression: Literal['none', 'deflate', 'lzma', 'zstd'] = 'lzma', + compression_level: Optional[int] = None, + attr_interleave: Literal['none', 'as_is', 'all'] = 'as_is', + num_threads: int = -1, +): + """ + Write a VXZ file containing voxels. + + Args: + file: Path or file-like object to the VXZ file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + chunk_size: the size of each chunk. + filter: the filter to apply to the voxels. + compression: the compression algorithm to use. + compression_level: the level of compression. + attr_interleave: how to interleave the attributes. + num_threads: the number of threads to use for compression. + """ + # Check + for k, v in attr.items(): + assert coord.shape[0] == v.shape[0], f"Number of coordinates and attributes do not match for key {k}" + assert v.dtype == torch.uint8, f"Attributes must be uint8, got {v.dtype} for key {k}" + assert attr_interleave in ['none', 'as_is', 'all'], f"Invalid attr_interleave value: {attr_interleave}" + + compression_level = compression_level or DEFAULT_COMPRESION_LEVEL[compression] + num_threads = num_threads if num_threads > 0 else os.cpu_count() + + file_info = { + 'num_voxel': coord.shape[0], + 'chunk_size': chunk_size, + 'filter': filter, + 'compression': compression, + 'compression_level': compression_level, + 'raw_size': sum([coord.numel() * 4] + [v.numel() for v in attr.values()]), + 'compressed_size': 0, + 'compress_ratio': 0.0, + 'attr_interleave': attr_interleave, + 'attr': [[k, v.shape[1]] for k, v in attr.items()], + 'chunks': [], + } + bin_data = b'' + + # Split into chunks + chunk_depth = np.log2(chunk_size) + assert chunk_depth.is_integer(), f"Chunk size must be a power of 2, got {chunk_size}" + chunk_depth = int(chunk_depth) + + chunk_coord = coord // chunk_size + coord = coord % chunk_size + unique_chunk_coord, inverse = torch.unique(chunk_coord, dim=0, return_inverse=True) + + chunks = [] + for idx, chunk_xyz in enumerate(unique_chunk_coord.tolist()): + chunk_mask = (inverse == idx) + chunks.append({ + 'idx': chunk_xyz, + 'coord': coord[chunk_mask], + 'attr': {k: v[chunk_mask] for k, v in attr.items()}, + }) + + # Compress each chunk + with ThreadPoolExecutor(max_workers=num_threads) as executor: + def worker(chunk): + ## compress to binary + coord = chunk['coord'] + morton_code = encode_seq(coord) + sorted_idx = morton_code.argsort().cpu() + coord = coord.cpu()[sorted_idx] + morton_code = morton_code.cpu()[sorted_idx] + attr = torch.cat([v.cpu()[sorted_idx] for v in chunk['attr'].values()], dim=1) + svo = _C.encode_sparse_voxel_octree_cpu(morton_code, chunk_depth) + svo_bytes = _compress(svo.numpy().tobytes(), compression, compression_level) + + # filter + if filter == 'none': + attr = attr.numpy() + elif filter == 'parent': + attr = _C.encode_sparse_voxel_octree_attr_parent_cpu(svo, chunk_depth, attr).numpy() + elif filter == 'neighbor': + attr = _C.encode_sparse_voxel_octree_attr_neighbor_cpu(coord, chunk_size, attr).numpy() + + # interleave attributes + attr_bytes = {} + if attr_interleave == 'none': + ch = 0 + for k, chs in file_info['attr']: + for i in range(chs): + attr_bytes[f'{k}_{i}'] = _compress(attr[:, ch].tobytes(), compression, compression_level) + ch += 1 + elif attr_interleave == 'as_is': + ch = 0 + for k, chs in file_info['attr']: + attr_bytes[k] = _compress(attr[:, ch:ch+chs].tobytes(), compression, compression_level) + ch += chs + elif attr_interleave == 'all': + attr_bytes['attr'] = _compress(attr.tobytes(), compression, compression_level) + + ## buffer for each chunk + chunk_info = {'idx': chunk['idx']} + bin_data = b'' + + ### svo + chunk_info['svo'] = [len(bin_data), len(svo_bytes)] + bin_data += svo_bytes + + ### attr + for k, v in attr_bytes.items(): + chunk_info[k] = [len(bin_data), len(v)] + bin_data += v + + return chunk_info, bin_data + + chunks = list(executor.map(worker, chunks)) + + for chunk_info, chunk_data in chunks: + chunk_info['ptr'] = [len(bin_data), len(chunk_data)] + bin_data += chunk_data + file_info['chunks'].append(chunk_info) + + file_info['compressed_size'] = len(bin_data) + file_info['compress_ratio'] = file_info['raw_size'] / file_info['compressed_size'] + + # File parts + structure_data = json.dumps(file_info).encode() + header = b'VXZ\x00' + struct.pack('>I', len(structure_data) + 8) + + # Write to file + if isinstance(file, str): + with open(file, 'wb') as f: + f.write(header) + f.write(structure_data) + f.write(bin_data) + else: + file.write(header) + file.write(structure_data) + file.write(bin_data) diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/postprocess.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..217155953d09ffd5393a1756051983e7013e62fb --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/postprocess.py @@ -0,0 +1,331 @@ +from typing import * +from tqdm import tqdm +import numpy as np +import torch +import cv2 +from PIL import Image +import trimesh +import trimesh.visual +from flex_gemm.ops.grid_sample import grid_sample_3d +import nvdiffrast.torch as dr +import cumesh + + +def to_glb( + vertices: torch.Tensor, + faces: torch.Tensor, + attr_volume: torch.Tensor, + coords: torch.Tensor, + attr_layout: Dict[str, slice], + aabb: Union[list, tuple, np.ndarray, torch.Tensor], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + decimation_target: int = 1000000, + texture_size: int = 2048, + remesh: bool = False, + remesh_band: float = 1, + remesh_project: float = 0.9, + mesh_cluster_threshold_cone_half_angle_rad=np.radians(90.0), + mesh_cluster_refine_iterations=0, + mesh_cluster_global_iterations=1, + mesh_cluster_smooth_strength=1, + verbose: bool = False, + use_tqdm: bool = False, +): + """ + Convert an extracted mesh to a GLB file. + Performs cleaning, optional remeshing, UV unwrapping, and texture baking from a volume. + + Args: + vertices: (N, 3) tensor of vertex positions + faces: (M, 3) tensor of vertex indices + attr_volume: (L, C) features of a sprase tensor for attribute interpolation + coords: (L, 3) tensor of coordinates for each voxel + attr_layout: dictionary of slice objects for each attribute + aabb: (2, 3) tensor of minimum and maximum coordinates of the volume + voxel_size: (3,) tensor of size of each voxel + grid_size: (3,) tensor of number of voxels in each dimension + decimation_target: target number of vertices for mesh simplification + texture_size: size of the texture for baking + remesh: whether to perform remeshing + remesh_band: size of the remeshing band + remesh_project: projection factor for remeshing + mesh_cluster_threshold_cone_half_angle_rad: threshold for cone-based clustering in uv unwrapping + mesh_cluster_refine_iterations: number of iterations for refining clusters in uv unwrapping + mesh_cluster_global_iterations: number of global iterations for clustering in uv unwrapping + mesh_cluster_smooth_strength: strength of smoothing for clustering in uv unwrapping + verbose: whether to print verbose messages + use_tqdm: whether to use tqdm to display progress bar + """ + # --- Input Normalization (AABB, Voxel Size, Grid Size) --- + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32, device=coords.device) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Calculate grid dimensions based on AABB and voxel size + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32, device=coords.device) + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + else: + assert grid_size is not None, "Either voxel_size or grid_size must be provided" + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32, device=coords.device) + voxel_size = (aabb[1] - aabb[0]) / grid_size + + # Assertions for dimensions + assert isinstance(voxel_size, torch.Tensor) + assert voxel_size.dim() == 1 and voxel_size.size(0) == 3 + assert isinstance(grid_size, torch.Tensor) + assert grid_size.dim() == 1 and grid_size.size(0) == 3 + + if use_tqdm: + pbar = tqdm(total=6, desc="Extracting GLB") + if verbose: + print(f"Original mesh: {vertices.shape[0]} vertices, {faces.shape[0]} faces") + + # Move data to GPU + vertices = vertices.cuda() + faces = faces.cuda() + + # Initialize CUDA mesh handler + mesh = cumesh.CuMesh() + mesh.init(vertices, faces) + + # --- Initial Mesh Cleaning --- + # Fills holes as much as we can before processing + mesh.fill_holes(max_hole_perimeter=3e-2) + if verbose: + print(f"After filling holes: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + vertices, faces = mesh.read() + if use_tqdm: + pbar.update(1) + + # Build BVH for the current mesh to guide remeshing + if use_tqdm: + pbar.set_description("Building BVH") + if verbose: + print(f"Building BVH for current mesh...", end='', flush=True) + bvh = cumesh.cuBVH(vertices, faces) + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + if use_tqdm: + pbar.set_description("Cleaning mesh") + if verbose: + print("Cleaning mesh...") + + # --- Branch 1: Standard Pipeline (Simplification & Cleaning) --- + if not remesh: + # Step 1: Aggressive simplification (3x target) + mesh.simplify(decimation_target * 3, verbose=verbose) + if verbose: + print(f"After inital simplification: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 2: Clean up topology (duplicates, non-manifolds, isolated parts) + mesh.remove_duplicate_faces() + mesh.repair_non_manifold_edges() + mesh.remove_small_connected_components(1e-5) + mesh.fill_holes(max_hole_perimeter=3e-2) + if verbose: + print(f"After initial cleanup: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 3: Final simplification to target count + mesh.simplify(decimation_target, verbose=verbose) + if verbose: + print(f"After final simplification: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 4: Final Cleanup loop + mesh.remove_duplicate_faces() + mesh.repair_non_manifold_edges() + mesh.remove_small_connected_components(1e-5) + mesh.fill_holes(max_hole_perimeter=3e-2) + if verbose: + print(f"After final cleanup: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 5: Unify face orientations + mesh.unify_face_orientations() + + # --- Branch 2: Remeshing Pipeline --- + else: + center = aabb.mean(dim=0) + scale = (aabb[1] - aabb[0]).max().item() + resolution = grid_size.max().item() + + # Perform Dual Contouring remeshing (rebuilds topology) + mesh.init(*cumesh.remeshing.remesh_narrow_band_dc( + vertices, faces, + center = center, + scale = (resolution + 3 * remesh_band) / resolution * scale, + resolution = resolution, + band = remesh_band, + project_back = remesh_project, # Snaps vertices back to original surface + verbose = verbose, + bvh = bvh, + )) + if verbose: + print(f"After remeshing: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Simplify and clean the remeshed result (similar logic to above) + mesh.simplify(decimation_target, verbose=verbose) + if verbose: + print(f"After simplifying: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + + # --- UV Parameterization --- + if use_tqdm: + pbar.set_description("Parameterizing new mesh") + if verbose: + print("Parameterizing new mesh...") + + out_vertices, out_faces, out_uvs, out_vmaps = mesh.uv_unwrap( + compute_charts_kwargs={ + "threshold_cone_half_angle_rad": mesh_cluster_threshold_cone_half_angle_rad, + "refine_iterations": mesh_cluster_refine_iterations, + "global_iterations": mesh_cluster_global_iterations, + "smooth_strength": mesh_cluster_smooth_strength, + }, + return_vmaps=True, + verbose=verbose, + ) + out_vertices = out_vertices.cuda() + out_faces = out_faces.cuda() + out_uvs = out_uvs.cuda() + out_vmaps = out_vmaps.cuda() + mesh.compute_vertex_normals() + out_normals = mesh.read_vertex_normals()[out_vmaps] + + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + # --- Texture Baking (Attribute Sampling) --- + if use_tqdm: + pbar.set_description("Sampling attributes") + if verbose: + print("Sampling attributes...", end='', flush=True) + + # Setup differentiable rasterizer context + ctx = dr.RasterizeCudaContext() + # Prepare UV coordinates for rasterization (rendering in UV space) + uvs_rast = torch.cat([out_uvs * 2 - 1, torch.zeros_like(out_uvs[:, :1]), torch.ones_like(out_uvs[:, :1])], dim=-1).unsqueeze(0) + rast = torch.zeros((1, texture_size, texture_size, 4), device='cuda', dtype=torch.float32) + + # Rasterize in chunks to save memory + for i in range(0, out_faces.shape[0], 100000): + rast_chunk, _ = dr.rasterize( + ctx, uvs_rast, out_faces[i:i+100000], + resolution=[texture_size, texture_size], + ) + mask_chunk = rast_chunk[..., 3:4] > 0 + rast_chunk[..., 3:4] += i # Store face ID in alpha channel + rast = torch.where(mask_chunk, rast_chunk, rast) + + # Mask of valid pixels in texture + mask = rast[0, ..., 3] > 0 + + # Interpolate 3D positions in UV space (finding 3D coord for every texel) + pos = dr.interpolate(out_vertices.unsqueeze(0), rast, out_faces)[0][0] + valid_pos = pos[mask] + + # Map these positions back to the *original* high-res mesh to get accurate attributes + # This corrects geometric errors introduced by simplification/remeshing + _, face_id, uvw = bvh.unsigned_distance(valid_pos, return_uvw=True) + orig_tri_verts = vertices[faces[face_id.long()]] # (N_new, 3, 3) + valid_pos = (orig_tri_verts * uvw.unsqueeze(-1)).sum(dim=1) + + # Trilinear sampling from the attribute volume (Color, Material props) + attrs = torch.zeros(texture_size, texture_size, attr_volume.shape[1], device='cuda') + attrs[mask] = grid_sample_3d( + attr_volume, + torch.cat([torch.zeros_like(coords[:, :1]), coords], dim=-1), + shape=torch.Size([1, attr_volume.shape[1], *grid_size.tolist()]), + grid=((valid_pos - aabb[0]) / voxel_size).reshape(1, -1, 3), + mode='trilinear', + ) + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + # --- Texture Post-Processing & Material Construction --- + if use_tqdm: + pbar.set_description("Finalizing mesh") + if verbose: + print("Finalizing mesh...", end='', flush=True) + + mask = mask.cpu().numpy() + + # Extract channels based on layout (BaseColor, Metallic, Roughness, Alpha) + base_color = np.clip(attrs[..., attr_layout['base_color']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + metallic = np.clip(attrs[..., attr_layout['metallic']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + roughness = np.clip(attrs[..., attr_layout['roughness']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + alpha = np.clip(attrs[..., attr_layout['alpha']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + alpha_mode = 'OPAQUE' + + # Inpainting: fill gaps (dilation) to prevent black seams at UV boundaries + mask_inv = (~mask).astype(np.uint8) + base_color = cv2.inpaint(base_color, mask_inv, 3, cv2.INPAINT_TELEA) + metallic = cv2.inpaint(metallic, mask_inv, 1, cv2.INPAINT_TELEA)[..., None] + roughness = cv2.inpaint(roughness, mask_inv, 1, cv2.INPAINT_TELEA)[..., None] + alpha = cv2.inpaint(alpha, mask_inv, 1, cv2.INPAINT_TELEA)[..., None] + + # Create PBR material + # Standard PBR packs Metallic and Roughness into Blue and Green channels + material = trimesh.visual.material.PBRMaterial( + baseColorTexture=Image.fromarray(np.concatenate([base_color, alpha], axis=-1)), + baseColorFactor=np.array([255, 255, 255, 255], dtype=np.uint8), + metallicRoughnessTexture=Image.fromarray(np.concatenate([np.zeros_like(metallic), roughness, metallic], axis=-1)), + metallicFactor=1.0, + roughnessFactor=1.0, + alphaMode=alpha_mode, + doubleSided=True if not remesh else False, + ) + + # --- Coordinate System Conversion & Final Object --- + vertices_np = out_vertices.cpu().numpy() + faces_np = out_faces.cpu().numpy() + uvs_np = out_uvs.cpu().numpy() + normals_np = out_normals.cpu().numpy() + + # Swap Y and Z axes, invert Y (common conversion for GLB compatibility) + vertices_np[:, 1], vertices_np[:, 2] = vertices_np[:, 2], -vertices_np[:, 1] + normals_np[:, 1], normals_np[:, 2] = normals_np[:, 2], -normals_np[:, 1] + uvs_np[:, 1] = 1 - uvs_np[:, 1] # Flip UV V-coordinate + + textured_mesh = trimesh.Trimesh( + vertices=vertices_np, + faces=faces_np, + vertex_normals=normals_np, + process=False, + visual=trimesh.visual.TextureVisuals(uv=uvs_np, material=material) + ) + + if use_tqdm: + pbar.update(1) + pbar.close() + if verbose: + print("Done") + + return textured_mesh \ No newline at end of file diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/rasterize.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/rasterize.py new file mode 100644 index 0000000000000000000000000000000000000000..63ae53b61e0cb501eb274b342bc5d337adfabfee --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/rasterize.py @@ -0,0 +1,111 @@ +import torch +import torch.nn.functional as F +from easydict import EasyDict as edict +from . import _C + + +def intrinsics_to_projection( + intrinsics: torch.Tensor, + near: float, + far: float, + ) -> torch.Tensor: + """ + OpenCV intrinsics to OpenGL perspective matrix + + Args: + intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix + near (float): near plane to clip + far (float): far plane to clip + Returns: + (torch.Tensor): [4, 4] OpenGL perspective matrix + """ + fx, fy = intrinsics[0, 0], intrinsics[1, 1] + cx, cy = intrinsics[0, 2], intrinsics[1, 2] + ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device) + ret[0, 0] = 2 * fx + ret[1, 1] = 2 * fy + ret[0, 2] = 2 * cx - 1 + ret[1, 2] = - 2 * cy + 1 + ret[2, 2] = far / (far - near) + ret[2, 3] = near * far / (near - far) + ret[3, 2] = 1. + return ret + + +class VoxelRenderer: + """ + Renderer for the Voxel representation. + + Args: + rendering_options (dict): Rendering options. + """ + + def __init__(self, rendering_options={}) -> None: + self.rendering_options = edict({ + "resolution": None, + "near": 0.1, + "far": 10.0, + "ssaa": 1, + }) + self.rendering_options.update(rendering_options) + + def render( + self, + position: torch.Tensor, + attrs: torch.Tensor, + voxel_size: float, + extrinsics: torch.Tensor, + intrinsics: torch.Tensor, + ) -> edict: + """ + Render the octree. + + Args: + position (torch.Tensor): (N, 3) xyz positions + attrs (torch.Tensor): (N, C) attributes + voxel_size (float): voxel size + extrinsics (torch.Tensor): (4, 4) camera extrinsics + intrinsics (torch.Tensor): (3, 3) camera intrinsics + + Returns: + edict containing: + attr (torch.Tensor): (C, H, W) rendered color + depth (torch.Tensor): (H, W) rendered depth + alpha (torch.Tensor): (H, W) rendered alpha + """ + resolution = self.rendering_options["resolution"] + near = self.rendering_options["near"] + far = self.rendering_options["far"] + ssaa = self.rendering_options["ssaa"] + + view = extrinsics + perspective = intrinsics_to_projection(intrinsics, near, far) + camera = torch.inverse(view)[:3, 3] + focalx = intrinsics[0, 0] + focaly = intrinsics[1, 1] + args = ( + position, + attrs, + voxel_size, + view.T.contiguous(), + (perspective @ view).T.contiguous(), + camera, + 0.5 / focalx, + 0.5 / focaly, + resolution * ssaa, + resolution * ssaa, + ) + color, depth, alpha = _C.rasterize_voxels_cuda(*args) + + if ssaa > 1: + color = F.interpolate(color[None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() + depth = F.interpolate(depth[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() + alpha = F.interpolate(alpha[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() + + ret = edict({ + 'attr': color, + 'depth': depth, + 'alpha': alpha, + }) + return ret + \ No newline at end of file diff --git a/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/serialize.py b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/serialize.py new file mode 100644 index 0000000000000000000000000000000000000000..daf7598059ceb40e2aca64f97503c36ae5ccba0a --- /dev/null +++ b/o-voxel/build/lib.win-amd64-cpython-311/o_voxel/serialize.py @@ -0,0 +1,68 @@ +from typing import * +import torch +from . import _C + + +@torch.no_grad() +def encode_seq(coords: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor: + """ + Encodes 3D coordinates into a 30-bit code. + + Args: + coords: a tensor of shape [N, 3] containing the 3D coordinates. + permute: the permutation of the coordinates. + mode: the encoding mode to use. + """ + assert coords.shape[-1] == 3 and coords.ndim == 2, "Input coordinates must be of shape [N, 3]" + x = coords[:, permute[0]].int() + y = coords[:, permute[1]].int() + z = coords[:, permute[2]].int() + if mode == 'z_order': + if coords.device.type == 'cpu': + return _C.z_order_encode_cpu(x, y, z) + elif coords.device.type == 'cuda': + return _C.z_order_encode_cuda(x, y, z) + else: + raise ValueError(f"Unsupported device type: {coords.device.type}") + elif mode == 'hilbert': + if coords.device.type == 'cpu': + return _C.hilbert_encode_cpu(x, y, z) + elif coords.device.type == 'cuda': + return _C.hilbert_encode_cuda(x, y, z) + else: + raise ValueError(f"Unsupported device type: {coords.device.type}") + else: + raise ValueError(f"Unknown encoding mode: {mode}") + + +@torch.no_grad() +def decode_seq(code: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor: + """ + Decodes a 30-bit code into 3D coordinates. + + Args: + code: a tensor of shape [N] containing the 30-bit code. + permute: the permutation of the coordinates. + mode: the decoding mode to use. + """ + assert code.ndim == 1, "Input code must be of shape [N]" + if mode == 'z_order': + if code.device.type == 'cpu': + coords = _C.z_order_decode_cpu(code) + elif code.device.type == 'cuda': + coords = _C.z_order_decode_cuda(code) + else: + raise ValueError(f"Unsupported device type: {code.device.type}") + elif mode == 'hilbert': + if code.device.type == 'cpu': + coords = _C.hilbert_decode_cpu(code) + elif code.device.type == 'cuda': + coords = _C.hilbert_decode_cuda(code) + else: + raise ValueError(f"Unsupported device type: {code.device.type}") + else: + raise ValueError(f"Unknown decoding mode: {mode}") + x = coords[permute.index(0)] + y = coords[permute.index(1)] + z = coords[permute.index(2)] + return torch.stack([x, y, z], dim=-1) diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_deps b/o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_deps new file mode 100644 index 0000000000000000000000000000000000000000..e95a189815280ed79c26330ecb1c51fd0e5306c1 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_deps @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7fadf53ce27c1cc064c4c4e7d4acdca36f011728eb7a355d2fe876d06d11ef89 +size 1473980 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_log b/o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_log new file mode 100644 index 0000000000000000000000000000000000000000..a2f12b1d6b585fcc5338da9ea8e3ed21f44315c1 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/.ninja_log @@ -0,0 +1,12 @@ +# ninja log v7 +43 5089 7920314585696679 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/z_order.obj aba9bdfd7758963 +40 5094 7920314585696679 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/hilbert.obj 96320bdff7b77437 +30 12370 7920314585499614 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/svo.obj 2237e66b874990a +23 12418 7920314585499614 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_neighbor.obj 41021b78b504c47e +26 12470 7920314585499614 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_parent.obj 471c5c41ea624cff +13 13565 7920314585421492 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/volumetic_attr.obj a880e2e3fea2c1dc +16 14155 7920314585421492 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/ext.obj c49c64d83f84cba7 +9 22492 7920314585385751 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/flexible_dual_grid.obj 387210cbde44cf56 +36 39184 7920314585658621 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/api.obj 9d1bef8355fab5c1 +19 39211 7920314585489571 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/hash/hash.obj ca81a4c30cd1e199 +33 40641 7920314585629483 C:/Users/opsiclear/Desktop/projects/Trellis2_multi_image_conditioning/o-voxel/build/temp.win-amd64-cpython-311/Release/src/rasterize/rasterize.obj cacdf260d45d5cc diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/build.ninja b/o-voxel/build/temp.win-amd64-cpython-311/Release/build.ninja new file mode 100644 index 0000000000000000000000000000000000000000..5071ecdfa30b3a456e33aa1aeee76643a603bcfc --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/build.ninja @@ -0,0 +1,46 @@ +ninja_required_version = 1.3 +cxx = cl +nvcc = C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\bin\nvcc + +cflags = /nologo /O2 /W3 /GL /DNDEBUG /MD -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\third_party/eigen -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\.venv\Lib\site-packages\torch\include -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\.venv\Lib\site-packages\torch\include\torch\csrc\api\include "-IC:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\include" -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\.venv\include -IC:\Users\opsiclear\AppData\Roaming\uv\python\cpython-3.11.13-windows-x86_64-none\include -IC:\Users\opsiclear\AppData\Roaming\uv\python\cpython-3.11.13-windows-x86_64-none\Include "-IC:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\include" "-IC:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\ATLMFC\include" "-IC:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\VS\include" "-IC:\Program Files (x86)\Windows Kits\10\include\10.0.26100.0\ucrt" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\um" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\shared" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\winrt" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\cppwinrt" "-IC:\Program Files (x86)\Windows Kits\NETFXSDK\4.8\include\um" /MD /wd4819 /wd4251 /wd4244 /wd4267 /wd4275 /wd4018 /wd4190 /wd4624 /wd4067 /wd4068 /EHsc +post_cflags = /O2 /std:c++20 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=_C +cuda_cflags = -std=c++17 -Xcompiler /MD -Xcompiler /wd4819 -Xcompiler /wd4251 -Xcompiler /wd4244 -Xcompiler /wd4267 -Xcompiler /wd4275 -Xcompiler /wd4018 -Xcompiler /wd4190 -Xcompiler /wd4624 -Xcompiler /wd4067 -Xcompiler /wd4068 -Xcompiler /EHsc --use-local-env -Xcudafe --diag_suppress=base_class_has_different_dll_interface -Xcudafe --diag_suppress=field_without_dll_interface -Xcudafe --diag_suppress=dll_interface_conflict_none_assumed -Xcudafe --diag_suppress=dll_interface_conflict_dllexport_assumed -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\third_party/eigen -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\.venv\Lib\site-packages\torch\include -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\.venv\Lib\site-packages\torch\include\torch\csrc\api\include "-IC:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v13.0\include" -IC:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\.venv\include -IC:\Users\opsiclear\AppData\Roaming\uv\python\cpython-3.11.13-windows-x86_64-none\include -IC:\Users\opsiclear\AppData\Roaming\uv\python\cpython-3.11.13-windows-x86_64-none\Include "-IC:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\include" "-IC:\Program Files\Microsoft Visual Studio\2022\Community\VC\Tools\MSVC\14.44.35207\ATLMFC\include" "-IC:\Program Files\Microsoft Visual Studio\2022\Community\VC\Auxiliary\VS\include" "-IC:\Program Files (x86)\Windows Kits\10\include\10.0.26100.0\ucrt" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\um" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\shared" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\winrt" "-IC:\Program Files (x86)\Windows Kits\10\\include\10.0.26100.0\\cppwinrt" "-IC:\Program Files (x86)\Windows Kits\NETFXSDK\4.8\include\um" +cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -O3 -std=c++20 -DTORCH_API_INCLUDE_EXTENSION_H -DTORCH_EXTENSION_NAME=_C -gencode=arch=compute_120,code=compute_120 -gencode=arch=compute_120,code=sm_120 +cuda_dlink_post_cflags = +sycl_dlink_post_cflags = +ldflags = + +rule compile + command = cl /showIncludes $cflags -c $in /Fo$out $post_cflags + deps = msvc + +rule cuda_compile + depfile = $out.d + deps = gcc + command = $nvcc --generate-dependencies-with-compile --dependency-output $out.d $cuda_cflags -c $in -o $out $cuda_post_cflags + + + + + + + +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/convert/flexible_dual_grid.obj: compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\convert\flexible_dual_grid.cpp +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/convert/volumetic_attr.obj: compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\convert\volumetic_attr.cpp +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/ext.obj: compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\ext.cpp +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/hash/hash.obj: cuda_compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\hash\hash.cu +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/io/filter_neighbor.obj: compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\io\filter_neighbor.cpp +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/io/filter_parent.obj: compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\io\filter_parent.cpp +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/io/svo.obj: compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\io\svo.cpp +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/rasterize/rasterize.obj: cuda_compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\rasterize\rasterize.cu +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/serialize/api.obj: cuda_compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\serialize\api.cu +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/serialize/hilbert.obj: cuda_compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\serialize\hilbert.cu +build C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\build\temp.win-amd64-cpython-311\Release\src/serialize/z_order.obj: cuda_compile C$:\Users\opsiclear\Desktop\projects\Trellis2_multi_image_conditioning\o-voxel\src\serialize\z_order.cu + + + + + + + + diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/_C.cp311-win_amd64.exp b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/_C.cp311-win_amd64.exp new file mode 100644 index 0000000000000000000000000000000000000000..9c983c8e043c7b99a165862f0ecaecc2849b3c00 Binary files /dev/null and b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/_C.cp311-win_amd64.exp differ diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/_C.cp311-win_amd64.lib b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/_C.cp311-win_amd64.lib new file mode 100644 index 0000000000000000000000000000000000000000..8518a45a550e692b8711112632f78b1d556035c0 Binary files /dev/null and b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/_C.cp311-win_amd64.lib differ diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/flexible_dual_grid.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/flexible_dual_grid.obj new file mode 100644 index 0000000000000000000000000000000000000000..dcb19b420a3816f33a96453450eb68adf91393e1 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/flexible_dual_grid.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:199b03ff7c85fe0c41817df7fb0ac4b69ba5fced59da8298955daaad564dddd2 +size 101177043 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/volumetic_attr.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/volumetic_attr.obj new file mode 100644 index 0000000000000000000000000000000000000000..04968542e9280b5401f017cac60b4d8c329ffb9e --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/convert/volumetic_attr.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:881ae0ab39efe22c1cadae7db8483ce3c076f66b6654a0d16d333e86540e3c84 +size 54681553 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/ext.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/ext.obj new file mode 100644 index 0000000000000000000000000000000000000000..4473492976c2302474a20944fbf316de928bf4b3 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/ext.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25482cf27a40cae07b0d0cce67e03d390dafbb1a53bc668c0bd4ad03fc8a41c7 +size 60112845 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/hash/hash.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/hash/hash.obj new file mode 100644 index 0000000000000000000000000000000000000000..9d4c684a17f549a5b60c4932894ae8678c10a067 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/hash/hash.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e7aad51236896fdf39da8586753443ad460ba3827580120f0fa7e36a79c9fc2 +size 3310522 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_neighbor.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_neighbor.obj new file mode 100644 index 0000000000000000000000000000000000000000..45798a7df4d8ada4cd34760c199eaf975065b0ab --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_neighbor.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d1b9b3e1de6293d8a846594b4666662c7713c8656988343ec237760a23a8184 +size 49303588 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_parent.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_parent.obj new file mode 100644 index 0000000000000000000000000000000000000000..582550203b2c7c120ae5d927e66f1bac72d81a13 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/filter_parent.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:be06f0928225aaa96f784d6c54e43cd248fc9077b3ff3761bab35c78188a5f91 +size 49318336 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/svo.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/svo.obj new file mode 100644 index 0000000000000000000000000000000000000000..a1cebf153e29124b256e2403b1b12c9bcb92b30d --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/io/svo.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84252f221ffdca2a21050f669078f59080986b7a623fbd4d4f25358c35c9e125 +size 49340082 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/rasterize/rasterize.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/rasterize/rasterize.obj new file mode 100644 index 0000000000000000000000000000000000000000..4084ff387308dfdd654446c82329e703402cbd7c --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/rasterize/rasterize.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c5b1d508b498de72395b0aee27f4d8db3f53463c5a3c5ec5f0f30b8652c5c3f +size 3082508 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/api.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/api.obj new file mode 100644 index 0000000000000000000000000000000000000000..80494f891b26de4b89aa0b09b640083e05d956a0 --- /dev/null +++ b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/api.obj @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b1750282d12d2693bda53c40f7ff1996e502116ddc5b49d9f73459120133302 +size 3021052 diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/hilbert.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/hilbert.obj new file mode 100644 index 0000000000000000000000000000000000000000..6a5dd068915ad8936e1cfc077c1ae9463fa9f19a Binary files /dev/null and b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/hilbert.obj differ diff --git a/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/z_order.obj b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/z_order.obj new file mode 100644 index 0000000000000000000000000000000000000000..177c37c2a4b00469e6d77a25c09788e7ab06492f Binary files /dev/null and b/o-voxel/build/temp.win-amd64-cpython-311/Release/src/serialize/z_order.obj differ diff --git a/o-voxel/examples/mesh2ovox.py b/o-voxel/examples/mesh2ovox.py new file mode 100644 index 0000000000000000000000000000000000000000..12fa2c9d7540ef74e0afcd4e0a8258b191d2fc90 --- /dev/null +++ b/o-voxel/examples/mesh2ovox.py @@ -0,0 +1,57 @@ +import torch +import o_voxel +import utils + +RES = 512 + +asset = utils.get_helmet() + +# 0. Normalize asset to unit cube +aabb = asset.bounding_box.bounds +center = (aabb[0] + aabb[1]) / 2 +scale = 0.99999 / (aabb[1] - aabb[0]).max() # To avoid numerical issues +asset.apply_translation(-center) +asset.apply_scale(scale) + +# 1. Geometry Voxelization (Flexible Dual Grid) +# Returns: occupied indices, dual vertices (QEF solution), and edge intersected +mesh = asset.to_mesh() +vertices = torch.from_numpy(mesh.vertices).float() +faces = torch.from_numpy(mesh.faces).long() +voxel_indices, dual_vertices, intersected = o_voxel.convert.mesh_to_flexible_dual_grid( + vertices, faces, + grid_size=RES, # Resolution + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], # Axis-aligned bounding box + face_weight=1.0, # Face term weight in QEF + boundary_weight=0.2, # Boundary term weight in QEF + regularization_weight=1e-2, # Regularization term weight in QEF + timing=True +) +## sort to ensure align between geometry and material voxelization +vid = o_voxel.serialize.encode_seq(voxel_indices) +mapping = torch.argsort(vid) +voxel_indices = voxel_indices[mapping] +dual_vertices = dual_vertices[mapping] +intersected = intersected[mapping] + +# 2. Material Voxelization (Volumetric Attributes) +# Returns: dict containing 'base_color', 'metallic', 'roughness', etc. +voxel_indices_mat, attributes = o_voxel.convert.textured_mesh_to_volumetric_attr( + asset, + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], + timing=True +) +## sort to ensure align between geometry and material voxelization +vid_mat = o_voxel.serialize.encode_seq(voxel_indices_mat) +mapping_mat = torch.argsort(vid_mat) +attributes = {k: v[mapping_mat] for k, v in attributes.items()} + +# Save to compressed .vxz format +## packing +dual_vertices = dual_vertices * RES - voxel_indices +dual_vertices = (torch.clamp(dual_vertices, 0, 1) * 255).type(torch.uint8) +intersected = (intersected[:, 0:1] + 2 * intersected[:, 1:2] + 4 * intersected[:, 2:3]).type(torch.uint8) +attributes['dual_vertices'] = dual_vertices +attributes['intersected'] = intersected +o_voxel.io.write("ovoxel_helmet.vxz", voxel_indices, attributes) \ No newline at end of file diff --git a/o-voxel/examples/ovox2glb.py b/o-voxel/examples/ovox2glb.py new file mode 100644 index 0000000000000000000000000000000000000000..9ffb654cd4f9e0138c5d38b9fafaa72608bcc2d8 --- /dev/null +++ b/o-voxel/examples/ovox2glb.py @@ -0,0 +1,52 @@ +import torch +import o_voxel + +RES = 512 + +# Load data +coords, data = o_voxel.io.read("ovoxel_helmet.vxz") +dual_vertices = data['dual_vertices'] +intersected = data['intersected'] +base_color = data['base_color'] +metallic = data['metallic'] +roughness = data['roughness'] +alpha = data['alpha'] + +# Depack +dual_vertices = dual_vertices / 255 +intersected = torch.cat([ + intersected % 2, + intersected // 2 % 2, + intersected // 4 % 2, +], dim=-1).bool() + +# Extract Mesh +# O-Voxel connects dual vertices to form quads, optionally splitting them +# based on geometric features. +rec_verts, rec_faces = o_voxel.convert.flexible_dual_grid_to_mesh( + coords.cuda(), + dual_vertices.cuda(), + intersected.cuda(), + split_weight=None, # Auto-split based on min angle if None + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], +) + +# Post-process +attr_volume = torch.cat([base_color.cuda(), metallic.cuda(), roughness.cuda(), alpha.cuda()], dim=-1) / 255 +attr_layout = {'base_color': slice(0,3), 'metallic': slice(3,4), 'roughness': slice(4,5), 'alpha': slice(5,6)} +mesh = o_voxel.postprocess.to_glb( + vertices=rec_verts, + faces=rec_faces, + attr_volume=attr_volume, + coords=coords.cuda(), + attr_layout=attr_layout, + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], + decimation_target=100000, + texture_size=2048, + verbose=True, +) + +# Save as glb +mesh.export("rec_helmet.glb") diff --git a/o-voxel/examples/ovox2mesh.py b/o-voxel/examples/ovox2mesh.py new file mode 100644 index 0000000000000000000000000000000000000000..5644cd82b7ec71c590ef1ccb0cb04d967c378c61 --- /dev/null +++ b/o-voxel/examples/ovox2mesh.py @@ -0,0 +1,45 @@ +import torch +import o_voxel +import trimesh +import trimesh.visual + +RES = 512 + +# Load data +coords, data = o_voxel.io.read("ovoxel_helmet.vxz") +dual_vertices = data['dual_vertices'] +intersected = data['intersected'] +base_color = data['base_color'] +metallic = data['metallic'] +roughness = data['roughness'] +alpha = data['alpha'] + +# Depack +dual_vertices = dual_vertices / 255 +intersected = torch.cat([ + intersected % 2, + intersected // 2 % 2, + intersected // 4 % 2, +], dim=-1).bool() + +# Extract Mesh +# O-Voxel connects dual vertices to form quads, optionally splitting them +# based on geometric features. +rec_verts, rec_faces = o_voxel.convert.flexible_dual_grid_to_mesh( + coords.cuda(), + dual_vertices.cuda(), + intersected.cuda(), + split_weight=None, # Auto-split based on min angle if None + grid_size=RES, + aabb=[[-0.5,-0.5,-0.5],[0.5,0.5,0.5]], +) + +# Save as ply +visual = trimesh.visual.ColorVisuals( + vertex_colors=base_color, +) +mesh = trimesh.Trimesh( + vertices=rec_verts.cpu(), faces=rec_faces.cpu(), visual=visual, + process=False +) +mesh.export("rec_helmet.ply") diff --git a/o-voxel/examples/render_ovox.py b/o-voxel/examples/render_ovox.py new file mode 100644 index 0000000000000000000000000000000000000000..09d49f63e0b7fd957bad7c3407d464cc1d0f2eef --- /dev/null +++ b/o-voxel/examples/render_ovox.py @@ -0,0 +1,39 @@ +import torch +import numpy as np +import imageio +import o_voxel +import utils3d + +RES = 512 + +# Load data +coords, data = o_voxel.io.read("ovoxel_helmet.vxz") +position = (coords / RES - 0.5).cuda() +base_color = (data['base_color'] / 255).cuda() + +# Setup camera +extr = utils3d.extrinsics_look_at( + eye=torch.tensor([1.2, 0.5, 1.2]), + look_at=torch.tensor([0.0, 0.0, 0.0]), + up=torch.tensor([0.0, 1.0, 0.0]) +).cuda() +intr = utils3d.intrinsics_from_fov_xy( + fov_x=torch.deg2rad(torch.tensor(45.0)), + fov_y=torch.deg2rad(torch.tensor(45.0)), +).cuda() + +# Render +renderer = o_voxel.rasterize.VoxelRenderer( + rendering_options={"resolution": 512, "ssaa": 2} +) +output = renderer.render( + position=position, # Voxel centers + attrs=base_color, # Color/Opacity etc. + voxel_size=1.0/RES, + extrinsics=extr, + intrinsics=intr +) +image = np.clip( + output.attr.permute(1, 2, 0).cpu().numpy() * 255, 0, 255 +).astype(np.uint8) +imageio.imwrite("ovoxel_helmet_visualization.png", image) diff --git a/o-voxel/examples/utils.py b/o-voxel/examples/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..75750e4ffc4b98fa1a5f6d6f2e2e62e8cc094844 --- /dev/null +++ b/o-voxel/examples/utils.py @@ -0,0 +1,27 @@ +import os +import requests +import tarfile +import trimesh + +HELMET_URL = "https://raw.githubusercontent.com/KhronosGroup/glTF-Sample-Models/refs/heads/main/2.0/DamagedHelmet/glTF-Binary/DamagedHelmet.glb" +CACHE_DIR = os.path.join(os.path.abspath(os.path.dirname(__file__)), "cache") + + +def download_file(url, path): + print(f"Downloading from {url} ...") + resp = requests.get(url, stream=True) + resp.raise_for_status() + + with open(path, "wb") as f: + for chunk in resp.iter_content(chunk_size=8192): + f.write(chunk) + + print(f"Saved to {path}") + + +def get_helmet() -> trimesh.Trimesh: + HELMET_PATH = os.path.join(CACHE_DIR, "helmet.glb") + if not os.path.exists(HELMET_PATH): + os.makedirs(CACHE_DIR, exist_ok=True) + download_file(HELMET_URL, HELMET_PATH) + return trimesh.load(HELMET_PATH) diff --git a/o-voxel/o_voxel.egg-info/PKG-INFO b/o-voxel/o_voxel.egg-info/PKG-INFO new file mode 100644 index 0000000000000000000000000000000000000000..2fb624924f60112ee8cfcdbab689e54f635fa5a2 --- /dev/null +++ b/o-voxel/o_voxel.egg-info/PKG-INFO @@ -0,0 +1,15 @@ +Metadata-Version: 2.1 +Name: o_voxel +Version: 0.0.1 +Summary: All about voxel. +Author-email: Jianfeng Xiang +Requires-Python: >=3.8 +Requires-Dist: torch +Requires-Dist: numpy +Requires-Dist: plyfile +Requires-Dist: trimesh +Requires-Dist: tqdm +Requires-Dist: zstandard +Requires-Dist: easydict +Requires-Dist: cumesh@ git+https://github.com/JeffreyXiang/CuMesh.git +Requires-Dist: flex_gemm@ git+https://github.com/JeffreyXiang/FlexGEMM.git diff --git a/o-voxel/o_voxel.egg-info/SOURCES.txt b/o-voxel/o_voxel.egg-info/SOURCES.txt new file mode 100644 index 0000000000000000000000000000000000000000..9e14690d1a030c99ef6ec2c48e6ac111251c0d77 --- /dev/null +++ b/o-voxel/o_voxel.egg-info/SOURCES.txt @@ -0,0 +1,30 @@ +README.md +pyproject.toml +setup.py +o_voxel/__init__.py +o_voxel/postprocess.py +o_voxel/rasterize.py +o_voxel/serialize.py +o_voxel.egg-info/PKG-INFO +o_voxel.egg-info/SOURCES.txt +o_voxel.egg-info/dependency_links.txt +o_voxel.egg-info/requires.txt +o_voxel.egg-info/top_level.txt +o_voxel/convert/__init__.py +o_voxel/convert/flexible_dual_grid.py +o_voxel/convert/volumetic_attr.py +o_voxel/io/__init__.py +o_voxel/io/npz.py +o_voxel/io/ply.py +o_voxel/io/vxz.py +src/ext.cpp +src/convert/flexible_dual_grid.cpp +src/convert/volumetic_attr.cpp +src/hash/hash.cu +src/io/filter_neighbor.cpp +src/io/filter_parent.cpp +src/io/svo.cpp +src/rasterize/rasterize.cu +src/serialize/api.cu +src/serialize/hilbert.cu +src/serialize/z_order.cu \ No newline at end of file diff --git a/o-voxel/o_voxel.egg-info/dependency_links.txt b/o-voxel/o_voxel.egg-info/dependency_links.txt new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/o-voxel/o_voxel.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/o-voxel/o_voxel.egg-info/requires.txt b/o-voxel/o_voxel.egg-info/requires.txt new file mode 100644 index 0000000000000000000000000000000000000000..9a6d507faebf064501723f6beefb1c9da4c9ce12 --- /dev/null +++ b/o-voxel/o_voxel.egg-info/requires.txt @@ -0,0 +1,9 @@ +torch +numpy +plyfile +trimesh +tqdm +zstandard +easydict +cumesh@ git+https://github.com/JeffreyXiang/CuMesh.git +flex_gemm@ git+https://github.com/JeffreyXiang/FlexGEMM.git diff --git a/o-voxel/o_voxel.egg-info/top_level.txt b/o-voxel/o_voxel.egg-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..867781a47f7be9716aac3839808851f0f8495384 --- /dev/null +++ b/o-voxel/o_voxel.egg-info/top_level.txt @@ -0,0 +1 @@ +o_voxel diff --git a/o-voxel/o_voxel/__init__.py b/o-voxel/o_voxel/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..55ffd986b47551517fbf1d1538b40f77ec5ee8f8 --- /dev/null +++ b/o-voxel/o_voxel/__init__.py @@ -0,0 +1,7 @@ +from . import ( + convert, + io, + postprocess, + rasterize, + serialize +) \ No newline at end of file diff --git a/o-voxel/o_voxel/convert/__init__.py b/o-voxel/o_voxel/convert/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..25755f9b06a2cf37856f3be043e3928a5e23510c --- /dev/null +++ b/o-voxel/o_voxel/convert/__init__.py @@ -0,0 +1,2 @@ +from .flexible_dual_grid import * +from .volumetic_attr import * \ No newline at end of file diff --git a/o-voxel/o_voxel/convert/flexible_dual_grid.py b/o-voxel/o_voxel/convert/flexible_dual_grid.py new file mode 100644 index 0000000000000000000000000000000000000000..51b8b0552fe697e95f7496a370a56c538c8abd10 --- /dev/null +++ b/o-voxel/o_voxel/convert/flexible_dual_grid.py @@ -0,0 +1,283 @@ +from typing import * +import numpy as np +import torch +from .. import _C + +__all__ = [ + "mesh_to_flexible_dual_grid", + "flexible_dual_grid_to_mesh", +] + + +def _init_hashmap(grid_size, capacity, device): + VOL = (grid_size[0] * grid_size[1] * grid_size[2]).item() + + # If the number of elements in the tensor is less than 2^32, use uint32 as the hashmap type, otherwise use uint64. + if VOL < 2**32: + hashmap_keys = torch.full((capacity,), torch.iinfo(torch.uint32).max, dtype=torch.uint32, device=device) + elif VOL < 2**64: + hashmap_keys = torch.full((capacity,), torch.iinfo(torch.uint64).max, dtype=torch.uint64, device=device) + else: + raise ValueError(f"The spatial size is too large to fit in a hashmap. Get volumn {VOL} > 2^64.") + + hashmap_vals = torch.empty((capacity,), dtype=torch.uint32, device=device) + + return hashmap_keys, hashmap_vals + + +@torch.no_grad() +def mesh_to_flexible_dual_grid( + vertices: torch.Tensor, + faces: torch.Tensor, + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + aabb: Union[list, tuple, np.ndarray, torch.Tensor] = None, + face_weight: float = 1.0, + boundary_weight: float = 1.0, + regularization_weight: float = 0.1, + timing: bool = False, +) -> Union[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Voxelize a mesh into a sparse voxel grid. + + Args: + vertices (torch.Tensor): The vertices of the mesh. + faces (torch.Tensor): The faces of the mesh. + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + If not provided, it will be computed automatically. + face_weight (float): The weight of the face term in the QEF when solving the dual vertices. + boundary_weight (float): The weight of the boundary term in the QEF when solving the dual vertices. + regularization_weight (float): The weight of the regularization term in the QEF when solving the dual vertices. + timing (bool): Whether to time the voxelization process. + + Returns: + torch.Tensor: The indices of the voxels that are occupied by the mesh. + The shape of the tensor is (N, 3), where N is the number of occupied voxels. + torch.Tensor: The dual vertices of the mesh. + torch.Tensor: The intersected flag of each voxel. + """ + + # Load mesh + vertices = vertices.float() + faces = faces.int() + + # Voxelize settings + assert voxel_size is not None or grid_size is not None, "Either voxel_size or grid_size must be provided" + + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + + if grid_size is not None: + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32) + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + if aabb is not None: + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Auto adjust aabb + if aabb is None: + min_xyz = vertices.min(dim=0).values + max_xyz = vertices.max(dim=0).values + + if voxel_size is not None: + padding = torch.ceil((max_xyz - min_xyz) / voxel_size) * voxel_size - (max_xyz - min_xyz) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + if grid_size is not None: + padding = (max_xyz - min_xyz) / (grid_size - 1) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + + aabb = torch.stack([min_xyz, max_xyz], dim=0).float().cuda() + + # Fill voxel size or grid size + if voxel_size is None: + voxel_size = (aabb[1] - aabb[0]) / grid_size + if grid_size is None: + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + + # subdivide mesh + vertices = vertices - aabb[0].reshape(1, 3) + grid_range = torch.stack([torch.zeros_like(grid_size), grid_size], dim=0).int() + + ret = _C.mesh_to_flexible_dual_grid_cpu( + vertices, + faces, + voxel_size, + grid_range, + face_weight, + boundary_weight, + regularization_weight, + timing, + ) + + return ret + + +def flexible_dual_grid_to_mesh( + coords: torch.Tensor, + dual_vertices: torch.Tensor, + intersected_flag: torch.Tensor, + split_weight: Union[torch.Tensor, None], + aabb: Union[list, tuple, np.ndarray, torch.Tensor], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + train: bool = False, +): + """ + Extract mesh from sparse voxel structures using flexible dual grid. + + Args: + coords (torch.Tensor): The coordinates of the voxels. + dual_vertices (torch.Tensor): The dual vertices. + intersected_flag (torch.Tensor): The intersected flag. + split_weight (torch.Tensor): The split weight of each dual quad. If None, the algorithm + will split based on minimum angle. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + train (bool): Whether to use training mode. + + Returns: + vertices (torch.Tensor): The vertices of the mesh. + faces (torch.Tensor): The faces of the mesh. + """ + # Static variables + if not hasattr(flexible_dual_grid_to_mesh, "edge_neighbor_voxel_offset"): + flexible_dual_grid_to_mesh.edge_neighbor_voxel_offset = torch.tensor([ + [[0, 0, 0], [0, 0, 1], [0, 1, 1], [0, 1, 0]], # x-axis + [[0, 0, 0], [1, 0, 0], [1, 0, 1], [0, 0, 1]], # y-axis + [[0, 0, 0], [0, 1, 0], [1, 1, 0], [1, 0, 0]], # z-axis + ], dtype=torch.int, device=coords.device).unsqueeze(0) + if not hasattr(flexible_dual_grid_to_mesh, "quad_split_1"): + flexible_dual_grid_to_mesh.quad_split_1 = torch.tensor([0, 1, 2, 0, 2, 3], dtype=torch.long, device=coords.device, requires_grad=False) + if not hasattr(flexible_dual_grid_to_mesh, "quad_split_2"): + flexible_dual_grid_to_mesh.quad_split_2 = torch.tensor([0, 1, 3, 3, 1, 2], dtype=torch.long, device=coords.device, requires_grad=False) + if not hasattr(flexible_dual_grid_to_mesh, "quad_split_train"): + flexible_dual_grid_to_mesh.quad_split_train = torch.tensor([0, 1, 4, 1, 2, 4, 2, 3, 4, 3, 0, 4], dtype=torch.long, device=coords.device, requires_grad=False) + + # AABB + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32, device=coords.device) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Voxel size + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32, device=coords.device) + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + else: + assert grid_size is not None, "Either voxel_size or grid_size must be provided" + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32, device=coords.device) + voxel_size = (aabb[1] - aabb[0]) / grid_size + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + # Extract mesh + N = dual_vertices.shape[0] + mesh_vertices = (coords.float() + dual_vertices) / (2 * N) - 0.5 + + # Store active voxels into hashmap + hashmap = _init_hashmap(grid_size, 2 * N, device=coords.device) + _C.hashmap_insert_3d_idx_as_val_cuda(*hashmap, torch.cat([torch.zeros_like(coords[:, :1]), coords], dim=-1), *grid_size.tolist()) + + # Find connected voxels + edge_neighbor_voxel = coords.reshape(N, 1, 1, 3) + flexible_dual_grid_to_mesh.edge_neighbor_voxel_offset # (N, 3, 4, 3) + connected_voxel = edge_neighbor_voxel[intersected_flag] # (M, 4, 3) + M = connected_voxel.shape[0] + connected_voxel_hash_key = torch.cat([ + torch.zeros((M * 4, 1), dtype=torch.int, device=coords.device), + connected_voxel.reshape(-1, 3) + ], dim=1) + connected_voxel_indices = _C.hashmap_lookup_3d_cuda(*hashmap, connected_voxel_hash_key, *grid_size.tolist()).reshape(M, 4).int() + connected_voxel_valid = (connected_voxel_indices != 0xffffffff).all(dim=1) + quad_indices = connected_voxel_indices[connected_voxel_valid].int() # (L, 4) + L = quad_indices.shape[0] + + # Construct triangles + if not train: + mesh_vertices = (coords.float() + dual_vertices) * voxel_size + aabb[0].reshape(1, 3) + if split_weight is None: + # if split 1 + atempt_triangles_0 = quad_indices[:, flexible_dual_grid_to_mesh.quad_split_1] + normals0 = torch.cross(mesh_vertices[atempt_triangles_0[:, 1]] - mesh_vertices[atempt_triangles_0[:, 0]], mesh_vertices[atempt_triangles_0[:, 2]] - mesh_vertices[atempt_triangles_0[:, 0]]) + normals1 = torch.cross(mesh_vertices[atempt_triangles_0[:, 2]] - mesh_vertices[atempt_triangles_0[:, 1]], mesh_vertices[atempt_triangles_0[:, 3]] - mesh_vertices[atempt_triangles_0[:, 1]]) + align0 = (normals0 * normals1).sum(dim=1, keepdim=True).abs() + # if split 2 + atempt_triangles_1 = quad_indices[:, flexible_dual_grid_to_mesh.quad_split_2] + normals0 = torch.cross(mesh_vertices[atempt_triangles_1[:, 1]] - mesh_vertices[atempt_triangles_1[:, 0]], mesh_vertices[atempt_triangles_1[:, 2]] - mesh_vertices[atempt_triangles_1[:, 0]]) + normals1 = torch.cross(mesh_vertices[atempt_triangles_1[:, 2]] - mesh_vertices[atempt_triangles_1[:, 1]], mesh_vertices[atempt_triangles_1[:, 3]] - mesh_vertices[atempt_triangles_1[:, 1]]) + align1 = (normals0 * normals1).sum(dim=1, keepdim=True).abs() + # select split + mesh_triangles = torch.where(align0 > align1, atempt_triangles_0, atempt_triangles_1).reshape(-1, 3) + else: + split_weight_ws = split_weight[quad_indices] + split_weight_ws_02 = split_weight_ws[:, 0] * split_weight_ws[:, 2] + split_weight_ws_13 = split_weight_ws[:, 1] * split_weight_ws[:, 3] + mesh_triangles = torch.where( + split_weight_ws_02 > split_weight_ws_13, + quad_indices[:, flexible_dual_grid_to_mesh.quad_split_1], + quad_indices[:, flexible_dual_grid_to_mesh.quad_split_2] + ).reshape(-1, 3) + else: + assert split_weight is not None, "split_weight must be provided in training mode" + mesh_vertices = (coords.float() + dual_vertices) * voxel_size + aabb[0].reshape(1, 3) + quad_vs = mesh_vertices[quad_indices] + mean_v02 = (quad_vs[:, 0] + quad_vs[:, 2]) / 2 + mean_v13 = (quad_vs[:, 1] + quad_vs[:, 3]) / 2 + split_weight_ws = split_weight[quad_indices] + split_weight_ws_02 = split_weight_ws[:, 0] * split_weight_ws[:, 2] + split_weight_ws_13 = split_weight_ws[:, 1] * split_weight_ws[:, 3] + mid_vertices = ( + split_weight_ws_02 * mean_v02 + + split_weight_ws_13 * mean_v13 + ) / (split_weight_ws_02 + split_weight_ws_13) + mesh_vertices = torch.cat([mesh_vertices, mid_vertices], dim=0) + quad_indices = torch.cat([quad_indices, torch.arange(N, N + L, device='cuda').unsqueeze(1)], dim=1) + mesh_triangles = quad_indices[:, flexible_dual_grid_to_mesh.quad_split_train].reshape(-1, 3) + + return mesh_vertices, mesh_triangles diff --git a/o-voxel/o_voxel/convert/volumetic_attr.py b/o-voxel/o_voxel/convert/volumetic_attr.py new file mode 100644 index 0000000000000000000000000000000000000000..fe24bfe876f01cceb02bdb5859232fa95779b5c6 --- /dev/null +++ b/o-voxel/o_voxel/convert/volumetic_attr.py @@ -0,0 +1,583 @@ +from typing import * +import io +from PIL import Image +import torch +import numpy as np +from tqdm import tqdm +import trimesh +import trimesh.visual + +from .. import _C + +__all__ = [ + "textured_mesh_to_volumetric_attr", + "blender_dump_to_volumetric_attr" +] + + +ALPHA_MODE_ENUM = { + "OPAQUE": 0, + "MASK": 1, + "BLEND": 2, +} + + +def is_power_of_two(n: int) -> bool: + return n > 0 and (n & (n - 1)) == 0 + + +def nearest_power_of_two(n: int) -> int: + if n < 1: + raise ValueError("n must be >= 1") + if is_power_of_two(n): + return n + lower = 2 ** (n.bit_length() - 1) + upper = 2 ** n.bit_length() + if n - lower < upper - n: + return lower + else: + return upper + + +def textured_mesh_to_volumetric_attr( + mesh: Union[trimesh.Scene, trimesh.Trimesh, str], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + aabb: Union[list, tuple, np.ndarray, torch.Tensor] = None, + mip_level_offset: float = 0.0, + verbose: bool = False, + timing: bool = False, +) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Voxelize a mesh into a sparse voxel grid with PBR properties. + + Args: + mesh (trimesh.Scene, trimesh.Trimesh, str): The input mesh. + If a string is provided, it will be loaded as a mesh using trimesh.load(). + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + If not provided, it will be computed automatically. + tile_size (int): The size of the tiles used for each individual voxelization. + mip_level_offset (float): The mip level offset for texture mip level selection. + verbose (bool): Whether to print the settings. + timing (bool): Whether to print the timing information. + + Returns: + torch.Tensor: The indices of the voxels that are occupied by the mesh. + Dict[str, torch.Tensor]: A dictionary containing the following keys: + - "base_color": The base color of the occupied voxels. + - "metallic": The metallic value of the occupied voxels. + - "roughness": The roughness value of the occupied voxels. + - "emissive": The emissive value of the occupied voxels. + - "alpha": The alpha value of the occupied voxels. + - "normal": The normal of the occupied voxels. + """ + + # Load mesh + if isinstance(mesh, str): + mesh = trimesh.load(mesh) + if isinstance(mesh, trimesh.Scene): + groups = mesh.dump() + if isinstance(mesh, trimesh.Trimesh): + groups = [mesh] + scene = trimesh.Scene(groups) + + # Voxelize settings + assert voxel_size is not None or grid_size is not None, "Either voxel_size or grid_size must be provided" + + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + + if grid_size is not None: + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32) + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + if aabb is not None: + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Auto adjust aabb + if aabb is None: + aabb = scene.bounds + min_xyz = aabb[0] + max_xyz = aabb[1] + + if voxel_size is not None: + padding = torch.ceil((max_xyz - min_xyz) / voxel_size) * voxel_size - (max_xyz - min_xyz) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + if grid_size is not None: + padding = (max_xyz - min_xyz) / (grid_size - 1) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + + aabb = torch.stack([min_xyz, max_xyz], dim=0).float() + + # Fill voxel size or grid size + if voxel_size is None: + voxel_size = (aabb[1] - aabb[0]) / grid_size + if grid_size is None: + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + + grid_range = torch.stack([torch.zeros_like(grid_size), grid_size], dim=0).int() + + # Print settings + if verbose: + print(f"Voxelize settings:") + print(f" Voxel size: {voxel_size}") + print(f" Grid size: {grid_size}") + print(f" AABB: {aabb}") + + # Load Scene + scene_buffers = { + 'triangles': [], + 'normals': [], + 'uvs': [], + 'material_ids': [], + 'base_color_factor': [], + 'base_color_texture': [], + 'metallic_factor': [], + 'metallic_texture': [], + 'roughness_factor': [], + 'roughness_texture': [], + 'emissive_factor': [], + 'emissive_texture': [], + 'alpha_mode': [], + 'alpha_cutoff': [], + 'alpha_factor': [], + 'alpha_texture': [], + 'normal_texture': [], + } + for sid, (name, g) in tqdm(enumerate(scene.geometry.items()), total=len(scene.geometry), desc="Loading Scene", disable=not verbose): + if verbose: + print(f"Geometry: {name}") + print(f" Visual: {g.visual}") + print(f" Triangles: {g.triangles.shape[0]}") + print(f" Vertices: {g.vertices.shape[0]}") + print(f" Normals: {g.vertex_normals.shape[0]}") + if g.visual.material.baseColorFactor is not None: + print(f" Base color factor: {g.visual.material.baseColorFactor}") + if g.visual.material.baseColorTexture is not None: + print(f" Base color texture: {g.visual.material.baseColorTexture.size} {g.visual.material.baseColorTexture.mode}") + if g.visual.material.metallicFactor is not None: + print(f" Metallic factor: {g.visual.material.metallicFactor}") + if g.visual.material.roughnessFactor is not None: + print(f" Roughness factor: {g.visual.material.roughnessFactor}") + if g.visual.material.metallicRoughnessTexture is not None: + print(f" Metallic roughness texture: {g.visual.material.metallicRoughnessTexture.size} {g.visual.material.metallicRoughnessTexture.mode}") + if g.visual.material.emissiveFactor is not None: + print(f" Emissive factor: {g.visual.material.emissiveFactor}") + if g.visual.material.emissiveTexture is not None: + print(f" Emissive texture: {g.visual.material.emissiveTexture.size} {g.visual.material.emissiveTexture.mode}") + if g.visual.material.alphaMode is not None: + print(f" Alpha mode: {g.visual.material.alphaMode}") + if g.visual.material.alphaCutoff is not None: + print(f" Alpha cutoff: {g.visual.material.alphaCutoff}") + if g.visual.material.normalTexture is not None: + print(f" Normal texture: {g.visual.material.normalTexture.size} {g.visual.material.normalTexture.mode}") + + assert isinstance(g, trimesh.Trimesh), f"Only trimesh.Trimesh is supported, but got {type(g)}" + assert isinstance(g.visual, trimesh.visual.TextureVisuals), f"Only trimesh.visual.TextureVisuals is supported, but got {type(g.visual)}" + assert isinstance(g.visual.material, trimesh.visual.material.PBRMaterial), f"Only trimesh.visual.material.PBRMaterial is supported, but got {type(g.visual.material)}" + triangles = torch.tensor(g.triangles, dtype=torch.float32) - aabb[0].reshape(1, 1, 3) # [N, 3, 3] + normals = torch.tensor(g.vertex_normals[g.faces], dtype=torch.float32) # [N, 3, 3] + uvs = torch.tensor(g.visual.uv[g.faces], dtype=torch.float32) if g.visual.uv is not None \ + else torch.zeros(g.triangles.shape[0], 3, 2, dtype=torch.float32) # [N, 3, 2] + baseColorFactor = torch.tensor(g.visual.material.baseColorFactor / 255, dtype=torch.float32) if g.visual.material.baseColorFactor is not None \ + else torch.ones(3, dtype=torch.float32) # [3] + baseColorTexture = torch.tensor(np.array(g.visual.material.baseColorTexture.convert('RGBA'))[..., :3], dtype=torch.uint8) if g.visual.material.baseColorTexture is not None \ + else torch.tensor([]) # [H, W, 3] + metallicFactor = g.visual.material.metallicFactor if g.visual.material.metallicFactor is not None else 1.0 + metallicTexture = torch.tensor(np.array(g.visual.material.metallicRoughnessTexture.convert('RGB'))[..., 2], dtype=torch.uint8) if g.visual.material.metallicRoughnessTexture is not None \ + else torch.tensor([]) # [H, W] + roughnessFactor = g.visual.material.roughnessFactor if g.visual.material.roughnessFactor is not None else 1.0 + roughnessTexture = torch.tensor(np.array(g.visual.material.metallicRoughnessTexture.convert('RGB'))[..., 1], dtype=torch.uint8) if g.visual.material.metallicRoughnessTexture is not None \ + else torch.tensor([]) # [H, W] + emissiveFactor = torch.tensor(g.visual.material.emissiveFactor, dtype=torch.float32) if g.visual.material.emissiveFactor is not None \ + else torch.zeros(3, dtype=torch.float32) # [3] + emissiveTexture = torch.tensor(np.array(g.visual.material.emissiveTexture.convert('RGB'))[..., :3], dtype=torch.uint8) if g.visual.material.emissiveTexture is not None \ + else torch.tensor([]) # [H, W, 3] + alphaMode = ALPHA_MODE_ENUM[g.visual.material.alphaMode] if g.visual.material.alphaMode in ALPHA_MODE_ENUM else 0 + alphaCutoff = g.visual.material.alphaCutoff if g.visual.material.alphaCutoff is not None else 0.5 + alphaFactor = g.visual.material.baseColorFactor[3] / 255 if g.visual.material.baseColorFactor is not None else 1.0 + alphaTexture = torch.tensor(np.array(g.visual.material.baseColorTexture.convert('RGBA'))[..., 3], dtype=torch.uint8) if g.visual.material.baseColorTexture is not None and alphaMode != 0 \ + else torch.tensor([]) # [H, W] + normalTexture = torch.tensor(np.array(g.visual.material.normalTexture.convert('RGB'))[..., :3], dtype=torch.uint8) if g.visual.material.normalTexture is not None \ + else torch.tensor([]) # [H, W, 3] + + scene_buffers['triangles'].append(triangles) + scene_buffers['normals'].append(normals) + scene_buffers['uvs'].append(uvs) + scene_buffers['material_ids'].append(torch.full((triangles.shape[0],), sid, dtype=torch.int32)) + scene_buffers['base_color_factor'].append(baseColorFactor) + scene_buffers['base_color_texture'].append(baseColorTexture) + scene_buffers['metallic_factor'].append(metallicFactor) + scene_buffers['metallic_texture'].append(metallicTexture) + scene_buffers['roughness_factor'].append(roughnessFactor) + scene_buffers['roughness_texture'].append(roughnessTexture) + scene_buffers['emissive_factor'].append(emissiveFactor) + scene_buffers['emissive_texture'].append(emissiveTexture) + scene_buffers['alpha_mode'].append(alphaMode) + scene_buffers['alpha_cutoff'].append(alphaCutoff) + scene_buffers['alpha_factor'].append(alphaFactor) + scene_buffers['alpha_texture'].append(alphaTexture) + scene_buffers['normal_texture'].append(normalTexture) + + scene_buffers['triangles'] = torch.cat(scene_buffers['triangles'], dim=0) # [N, 3, 3] + scene_buffers['normals'] = torch.cat(scene_buffers['normals'], dim=0) # [N, 3, 3] + scene_buffers['uvs'] = torch.cat(scene_buffers['uvs'], dim=0) # [N, 3, 2] + scene_buffers['material_ids'] = torch.cat(scene_buffers['material_ids'], dim=0) # [N] + + # Voxelize + out_tuple = _C.textured_mesh_to_volumetric_attr_cpu( + voxel_size, + grid_range, + scene_buffers["triangles"], + scene_buffers["normals"], + scene_buffers["uvs"], + scene_buffers["material_ids"], + scene_buffers["base_color_factor"], + scene_buffers["base_color_texture"], + [1] * len(scene_buffers["base_color_texture"]), + [0] * len(scene_buffers["base_color_texture"]), + scene_buffers["metallic_factor"], + scene_buffers["metallic_texture"], + [1] * len(scene_buffers["metallic_texture"]), + [0] * len(scene_buffers["metallic_texture"]), + scene_buffers["roughness_factor"], + scene_buffers["roughness_texture"], + [1] * len(scene_buffers["roughness_texture"]), + [0] * len(scene_buffers["roughness_texture"]), + scene_buffers["emissive_factor"], + scene_buffers["emissive_texture"], + [1] * len(scene_buffers["emissive_texture"]), + [0] * len(scene_buffers["emissive_texture"]), + scene_buffers["alpha_mode"], + scene_buffers["alpha_cutoff"], + scene_buffers["alpha_factor"], + scene_buffers["alpha_texture"], + [1] * len(scene_buffers["alpha_texture"]), + [0] * len(scene_buffers["alpha_texture"]), + scene_buffers["normal_texture"], + [1] * len(scene_buffers["normal_texture"]), + [0] * len(scene_buffers["normal_texture"]), + mip_level_offset, + timing, + ) + + # Post process + coord = out_tuple[0] + attr = { + "base_color": torch.clamp(out_tuple[1] * 255, 0, 255).byte().reshape(-1, 3), + "metallic": torch.clamp(out_tuple[2] * 255, 0, 255).byte().reshape(-1, 1), + "roughness": torch.clamp(out_tuple[3] * 255, 0, 255).byte().reshape(-1, 1), + "emissive": torch.clamp(out_tuple[4] * 255, 0, 255).byte().reshape(-1, 3), + "alpha": torch.clamp(out_tuple[5] * 255, 0, 255).byte().reshape(-1, 1), + "normal": torch.clamp((out_tuple[6] * 0.5 + 0.5) * 255, 0, 255).byte().reshape(-1, 3), + } + + return coord, attr + + +def blender_dump_to_volumetric_attr( + dump: Dict[str, Any], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + aabb: Union[list, tuple, np.ndarray, torch.Tensor] = None, + mip_level_offset: float = 0.0, + verbose: bool = False, + timing: bool = False, +) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Voxelize a mesh into a sparse voxel grid with PBR properties. + + Args: + dump (Dict[str, Any]): Dumped data from a blender scene. + voxel_size (float, list, tuple, np.ndarray, torch.Tensor): The size of each voxel. + grid_size (int, list, tuple, np.ndarray, torch.Tensor): The size of the grid. + NOTE: One of voxel_size and grid_size must be provided. + aabb (list, tuple, np.ndarray, torch.Tensor): The axis-aligned bounding box of the mesh. + If not provided, it will be computed automatically. + mip_level_offset (float): The mip level offset for texture mip level selection. + verbose (bool): Whether to print the settings. + timing (bool): Whether to print the timing information. + + Returns: + torch.Tensor: The indices of the voxels that are occupied by the mesh. + Dict[str, torch.Tensor]: A dictionary containing the following keys: + - "base_color": The base color of the occupied voxels. + - "metallic": The metallic value of the occupied voxels. + - "roughness": The roughness value of the occupied voxels. + - "emissive": The emissive value of the occupied voxels. + - "alpha": The alpha value of the occupied voxels. + - "normal": The normal of the occupied voxels. + """ + # Voxelize settings + assert voxel_size is not None or grid_size is not None, "Either voxel_size or grid_size must be provided" + + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32) + assert isinstance(voxel_size, torch.Tensor), f"voxel_size must be a float, list, tuple, np.ndarray, or torch.Tensor, but got {type(voxel_size)}" + assert voxel_size.dim() == 1, f"voxel_size must be a 1D tensor, but got {voxel_size.shape}" + assert voxel_size.size(0) == 3, f"voxel_size must have 3 elements, but got {voxel_size.size(0)}" + + if grid_size is not None: + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32) + assert isinstance(grid_size, torch.Tensor), f"grid_size must be an int, list, tuple, np.ndarray, or torch.Tensor, but got {type(grid_size)}" + assert grid_size.dim() == 1, f"grid_size must be a 1D tensor, but got {grid_size.shape}" + assert grid_size.size(0) == 3, f"grid_size must have 3 elements, but got {grid_size.size(0)}" + + if aabb is not None: + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Auto adjust aabb + if aabb is None: + min_xyz = np.min([ + object['vertices'].min(axis=0) + for object in dump['objects'] + ], axis=0) + max_xyz = np.max([ + object['vertices'].max(axis=0) + for object in dump['objects'] + ], axis=0) + + if voxel_size is not None: + padding = torch.ceil((max_xyz - min_xyz) / voxel_size) * voxel_size - (max_xyz - min_xyz) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + if grid_size is not None: + padding = (max_xyz - min_xyz) / (grid_size - 1) + min_xyz -= padding * 0.5 + max_xyz += padding * 0.5 + + aabb = torch.stack([min_xyz, max_xyz], dim=0).float() + + # Fill voxel size or grid size + if voxel_size is None: + voxel_size = (aabb[1] - aabb[0]) / grid_size + if grid_size is None: + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + + grid_range = torch.stack([torch.zeros_like(grid_size), grid_size], dim=0).int() + + # Print settings + if verbose: + print(f"Voxelize settings:") + print(f" Voxel size: {voxel_size}") + print(f" Grid size: {grid_size}") + print(f" AABB: {aabb}") + + # Load Scene + scene_buffers = { + 'triangles': [], + 'normals': [], + 'uvs': [], + 'material_ids': [], + 'base_color_factor': [], + 'base_color_texture': [], + 'base_color_texture_filter': [], + 'base_color_texture_wrap': [], + 'metallic_factor': [], + 'metallic_texture': [], + 'metallic_texture_filter': [], + 'metallic_texture_wrap': [], + 'roughness_factor': [], + 'roughness_texture': [], + 'roughness_texture_filter': [], + 'roughness_texture_wrap': [], + 'alpha_mode': [], + 'alpha_cutoff': [], + 'alpha_factor': [], + 'alpha_texture': [], + 'alpha_texture_filter': [], + 'alpha_texture_wrap': [], + } + + def load_texture(pack): + png_bytes = pack['image'] + image = Image.open(io.BytesIO(png_bytes)) + if image.width != image.height or not is_power_of_two(image.width): + size = nearest_power_of_two(max(image.width, image.height)) + image = image.resize((size, size), Image.LANCZOS) + texture = torch.tensor(np.array(image), dtype=torch.uint8) + filter_mode = { + 'Linear': 1, + 'Closest': 0, + 'Cubic': 1, + 'Smart': 1, + }[pack['interpolation']] + wrap_mode = { + 'REPEAT': 0, + 'EXTEND': 1, + 'CLIP': 1, + 'MIRROR': 2, + }[pack['extension']] + return texture, filter_mode, wrap_mode + + for material in dump['materials']: + baseColorFactor = torch.tensor(material['baseColorFactor'][:3], dtype=torch.float32) + if material['baseColorTexture'] is not None: + baseColorTexture, baseColorTextureFilter, baseColorTextureWrap = \ + load_texture(material['baseColorTexture']) + assert baseColorTexture.shape[2] == 3, f"Base color texture must have 3 channels, but got {baseColorTexture.shape[2]}" + else: + baseColorTexture = torch.tensor([]) + baseColorTextureFilter = 0 + baseColorTextureWrap = 0 + scene_buffers['base_color_factor'].append(baseColorFactor) + scene_buffers['base_color_texture'].append(baseColorTexture) + scene_buffers['base_color_texture_filter'].append(baseColorTextureFilter) + scene_buffers['base_color_texture_wrap'].append(baseColorTextureWrap) + + metallicFactor = material['metallicFactor'] + if material['metallicTexture'] is not None: + metallicTexture, metallicTextureFilter, metallicTextureWrap = \ + load_texture(material['metallicTexture']) + assert metallicTexture.dim() == 2, f"Metallic roughness texture must have 2 dimensions, but got {metallicTexture.dim()}" + else: + metallicTexture = torch.tensor([]) + metallicTextureFilter = 0 + metallicTextureWrap = 0 + scene_buffers['metallic_factor'].append(metallicFactor) + scene_buffers['metallic_texture'].append(metallicTexture) + scene_buffers['metallic_texture_filter'].append(metallicTextureFilter) + scene_buffers['metallic_texture_wrap'].append(metallicTextureWrap) + + roughnessFactor = material['roughnessFactor'] + if material['roughnessTexture'] is not None: + roughnessTexture, roughnessTextureFilter, roughnessTextureWrap = \ + load_texture(material['roughnessTexture']) + assert roughnessTexture.dim() == 2, f"Metallic roughness texture must have 2 dimensions, but got {roughnessTexture.dim()}" + else: + roughnessTexture = torch.tensor([]) + roughnessTextureFilter = 0 + roughnessTextureWrap = 0 + scene_buffers['roughness_factor'].append(roughnessFactor) + scene_buffers['roughness_texture'].append(roughnessTexture) + scene_buffers['roughness_texture_filter'].append(roughnessTextureFilter) + scene_buffers['roughness_texture_wrap'].append(roughnessTextureWrap) + + alphaMode = ALPHA_MODE_ENUM[material['alphaMode']] + alphaCutoff = material['alphaCutoff'] + alphaFactor = material['alphaFactor'] + if material['alphaTexture'] is not None: + alphaTexture, alphaTextureFilter, alphaTextureWrap = \ + load_texture(material['alphaTexture']) + assert alphaTexture.dim() == 2, f"Alpha texture must have 2 dimensions, but got {alphaTexture.dim()}" + else: + alphaTexture = torch.tensor([]) + alphaTextureFilter = 0 + alphaTextureWrap = 0 + scene_buffers['alpha_mode'].append(alphaMode) + scene_buffers['alpha_cutoff'].append(alphaCutoff) + scene_buffers['alpha_factor'].append(alphaFactor) + scene_buffers['alpha_texture'].append(alphaTexture) + scene_buffers['alpha_texture_filter'].append(alphaTextureFilter) + scene_buffers['alpha_texture_wrap'].append(alphaTextureWrap) + + for object in dump['objects']: + triangles = torch.tensor(object['vertices'][object['faces']], dtype=torch.float32).reshape(-1, 3, 3) - aabb[0].reshape(1, 1, 3) + normails = torch.tensor(object['normals'], dtype=torch.float32) + uvs = torch.tensor(object['uvs'], dtype=torch.float32) if object['uvs'] is not None else torch.zeros(triangles.shape[0], 3, 2, dtype=torch.float32) + material_id = torch.tensor(object['mat_ids'], dtype=torch.int32) + scene_buffers['triangles'].append(triangles) + scene_buffers['normals'].append(normails) + scene_buffers['uvs'].append(uvs) + scene_buffers['material_ids'].append(material_id) + + scene_buffers['triangles'] = torch.cat(scene_buffers['triangles'], dim=0) # [N, 3, 3] + scene_buffers['normals'] = torch.cat(scene_buffers['normals'], dim=0) # [N, 3, 3] + scene_buffers['uvs'] = torch.cat(scene_buffers['uvs'], dim=0) # [N, 3, 2] + scene_buffers['material_ids'] = torch.cat(scene_buffers['material_ids'], dim=0) # [N] + + scene_buffers['uvs'][:, :, 1] = 1 - scene_buffers['uvs'][:, :, 1] # Flip v coordinate + + # Voxelize + out_tuple = _C.textured_mesh_to_volumetric_attr_cpu( + voxel_size, + grid_range, + scene_buffers["triangles"], + scene_buffers["normals"], + scene_buffers["uvs"], + scene_buffers["material_ids"], + scene_buffers["base_color_factor"], + scene_buffers["base_color_texture"], + scene_buffers["base_color_texture_filter"], + scene_buffers["base_color_texture_wrap"], + scene_buffers["metallic_factor"], + scene_buffers["metallic_texture"], + scene_buffers["metallic_texture_filter"], + scene_buffers["metallic_texture_wrap"], + scene_buffers["roughness_factor"], + scene_buffers["roughness_texture"], + scene_buffers["roughness_texture_filter"], + scene_buffers["roughness_texture_wrap"], + [torch.zeros(3, dtype=torch.float32) for _ in range(len(scene_buffers["base_color_texture"]))], + [torch.tensor([]) for _ in range(len(scene_buffers["base_color_texture"]))], + [0] * len(scene_buffers["base_color_texture"]), + [0] * len(scene_buffers["base_color_texture"]), + scene_buffers["alpha_mode"], + scene_buffers["alpha_cutoff"], + scene_buffers["alpha_factor"], + scene_buffers["alpha_texture"], + scene_buffers["alpha_texture_filter"], + scene_buffers["alpha_texture_wrap"], + [torch.tensor([]) for _ in range(len(scene_buffers["base_color_texture"]))], + [0] * len(scene_buffers["base_color_texture"]), + [0] * len(scene_buffers["base_color_texture"]), + mip_level_offset, + timing, + ) + + # Post process + coord = out_tuple[0] + attr = { + "base_color": torch.clamp(out_tuple[1] * 255, 0, 255).byte().reshape(-1, 3), + "metallic": torch.clamp(out_tuple[2] * 255, 0, 255).byte().reshape(-1, 1), + "roughness": torch.clamp(out_tuple[3] * 255, 0, 255).byte().reshape(-1, 1), + "emissive": torch.clamp(out_tuple[4] * 255, 0, 255).byte().reshape(-1, 3), + "alpha": torch.clamp(out_tuple[5] * 255, 0, 255).byte().reshape(-1, 1), + "normal": torch.clamp((out_tuple[6] * 0.5 + 0.5) * 255, 0, 255).byte().reshape(-1, 3), + } + + return coord, attr \ No newline at end of file diff --git a/o-voxel/o_voxel/io/__init__.py b/o-voxel/o_voxel/io/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..db7eca220accb46c62dfac93f078c2938969866a --- /dev/null +++ b/o-voxel/o_voxel/io/__init__.py @@ -0,0 +1,45 @@ +from typing import Dict, Union +import torch +from .ply import * +from .npz import * +from .vxz import * + + +def read(file_path: str) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a file containing voxels. + + Args: + file_path: Path to the file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + if file_path.endswith('.npz'): + return read_npz(file_path) + elif file_path.endswith('.ply'): + return read_ply(file_path) + elif file_path.endswith('.vxz'): + return read_vxz(file_path) + else: + raise ValueError(f"Unsupported file type {file_path}") + + +def write(file_path: str, coord: torch.Tensor, attr: Dict[str, torch.Tensor], **kwargs): + """ + Write a file containing voxels. + + Args: + file_path: Path to the file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + """ + if file_path.endswith('.npz'): + write_npz(file_path, coord, attr, **kwargs) + elif file_path.endswith('.ply'): + write_ply(file_path, coord, attr, **kwargs) + elif file_path.endswith('.vxz'): + write_vxz(file_path, coord, attr, **kwargs) + else: + raise ValueError(f"Unsupported file type {file_path}") diff --git a/o-voxel/o_voxel/io/npz.py b/o-voxel/o_voxel/io/npz.py new file mode 100644 index 0000000000000000000000000000000000000000..17da9efe2b937540282cbaf25c385f19d0848be9 --- /dev/null +++ b/o-voxel/o_voxel/io/npz.py @@ -0,0 +1,43 @@ +from typing import * +import torch +import numpy as np + + +__all__ = [ + "read_npz", + "write_npz", +] + + +def read_npz(file) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a NPZ file containing voxels. + + Args: + file_path: Path or file object from which to read the NPZ file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + data = np.load(file) + coord = torch.from_numpy(data['coord']).int() + attr = {k: torch.from_numpy(v) for k, v in data.items() if k!= 'coord'} + return coord, attr + + +def write_npz(file, coord: torch.Tensor, attr: Dict[str, torch.Tensor], compress=True): + """ + Write a NPZ file containing voxels. + + Args: + file_path: Path or file object to which to write the NPZ file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + """ + data = {'coord': coord.cpu().numpy().astype(np.uint16)} + data.update({k: v.cpu().numpy() for k, v in attr.items()}) + if compress: + np.savez_compressed(file, **data) + else: + np.savez(file, **data) diff --git a/o-voxel/o_voxel/io/ply.py b/o-voxel/o_voxel/io/ply.py new file mode 100644 index 0000000000000000000000000000000000000000..747693218fabfaca994e1f23707878c3cae7b4c9 --- /dev/null +++ b/o-voxel/o_voxel/io/ply.py @@ -0,0 +1,72 @@ +from typing import * +import io +import torch +import numpy as np +import plyfile + + +__all__ = [ + "read_ply", + "write_ply", +] + + +DTYPE_MAP = { + torch.uint8: 'u1', + torch.uint16: 'u2', + torch.uint32: 'u4', + torch.int8: 'i1', + torch.int16: 'i2', + torch.int32: 'i4', + torch.float32: 'f4', + torch.float64: 'f8' +} + + +def read_ply(file) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a PLY file containing voxels. + + Args: + file: Path or file-like object of the PLY file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + plydata = plyfile.PlyData.read(file) + xyz = np.stack([plydata.elements[0][k] for k in ['x', 'y', 'z']], axis=1) + coord = np.round(xyz).astype(int) + coord = torch.from_numpy(coord) + + attr_keys = [k for k in plydata.elements[0].data.dtype.names if k not in ['x', 'y', 'z']] + attr_names = ['_'.join(k.split('_')[:-1]) for k in attr_keys] + attr_chs = [sum([1 for k in attr_keys if k.startswith(f'{name}_')]) for name in attr_names] + + attr = {} + for i, name in enumerate(attr_names): + attr[name] = np.stack([plydata.elements[0][f'{name}_{j}'] for j in range(attr_chs[i])], axis=1) + attr = {k: torch.from_numpy(v) for k, v in attr.items()} + + return coord, attr + + +def write_ply(file, coord: torch.Tensor, attr: Dict[str, torch.Tensor]): + """ + Write a PLY file containing voxels. + + Args: + file: Path or file-like object of the PLY file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + """ + dtypes = [('x', 'f4'), ('y', 'f4'), ('z', 'f4')] + for k, v in attr.items(): + for j in range(v.shape[-1]): + assert v.dtype in DTYPE_MAP, f"Unsupported data type {v.dtype} for attribute {k}" + dtypes.append((f'{k}_{j}', DTYPE_MAP[v.dtype])) + data = np.empty(len(coord), dtype=dtypes) + all_chs = np.concatenate([coord.cpu().numpy().astype(np.float32)] + [v.cpu().numpy() for v in attr.values()], axis=1) + data[:] = list(map(tuple, all_chs)) + plyfile.PlyData([plyfile.PlyElement.describe(data, 'vertex')]).write(file) + \ No newline at end of file diff --git a/o-voxel/o_voxel/io/vxz.py b/o-voxel/o_voxel/io/vxz.py new file mode 100644 index 0000000000000000000000000000000000000000..91fba74d26d2edaabbcd7edb707d272cbca25b8a --- /dev/null +++ b/o-voxel/o_voxel/io/vxz.py @@ -0,0 +1,365 @@ +from typing import * +import os +import json +import struct +import torch +import numpy as np +import zlib +import lzma +import zstandard +from concurrent.futures import ThreadPoolExecutor +from ..serialize import encode_seq, decode_seq +from .. import _C + + +__all__ = [ + "read_vxz", + "read_vxz_info", + "write_vxz", +] + + +""" +VXZ format + +Header: +- file type (3 bytes) - 'VXZ' +- version (1 byte) - 0 +- binary start offset (4 bytes) +- structure (json) - +{ + "num_voxel": int, + "chunk_size": int, + "filter": str, + "compression": str, + "compression_level": int, + "raw_size": int, + "compressed_size": int, + "compress_ratio": float, + "attr_interleave": str, + "attr": [ + {"name": str, "chs": int}, + ... + ] + "chunks": [ + { + "ptr": [offset, length], # offset from global binary start + "svo": [offset, length], # offset from this chunk start + "attr": [offset, length], # offset from this chunk start + }, + ... + ] +} +- binary data +""" + +DEFAULT_COMPRESION_LEVEL = { + 'none': 0, + 'deflate': 9, + 'lzma': 9, + 'zstd': 22, +} + + +def _compress(data: bytes, algo: Literal['none', 'deflate', 'lzma', 'zstd'], level: int) -> bytes: + if algo == 'none': + return data + if level is None: + level = DEFAULT_COMPRESION_LEVEL[algo] + if algo == 'deflate': + compresser = zlib.compressobj(level, wbits=-15) + return compresser.compress(data) + compresser.flush() + if algo == 'lzma': + compresser = lzma.LZMACompressor(format=lzma.FORMAT_RAW, filters=[{'id': lzma.FILTER_LZMA2, 'preset': level}]) + return compresser.compress(data) + compresser.flush() + if algo == 'zstd': + compresser = zstandard.ZstdCompressor(level=level, write_checksum=False, write_content_size=True, threads=-1) + return compresser.compress(data) + raise ValueError(f"Invalid compression algorithm: {algo}") + + +def _decompress(data: bytes, algo: Literal['none', 'deflate', 'lzma', 'zstd'], level: int) -> bytes: + if algo == 'none': + return data + if level is None: + level = DEFAULT_COMPRESION_LEVEL[algo] + if algo == 'deflate': + decompresser = zlib.decompressobj(wbits=-15) + return decompresser.decompress(data) + decompresser.flush() + if algo == 'lzma': + decompresser = lzma.LZMADecompressor(format=lzma.FORMAT_RAW, filters=[{'id': lzma.FILTER_LZMA2, 'preset': level}]) + return decompresser.decompress(data) + if algo == 'zstd': + decompresser = zstandard.ZstdDecompressor(format=zstandard.FORMAT_ZSTD1) + return decompresser.decompress(data) + raise ValueError(f"Invalid compression algorithm: {algo}") + + +def read_vxz_info(file) -> Dict: + """ + Read the header of a VXZ file without decompressing the binary data. + + Args: + file_path: Path or file-like object to the VXZ file. + + Returns: + Dict: the header of the VXZ file. + """ + if isinstance(file, str): + with open(file, 'rb') as f: + file_data = f.read() + else: + file_data = file.read() + + assert file_data[:3] == b'VXZ', "Invalid file type" + version = file_data[3] + assert version == 0, "Invalid file version" + + bin_start = struct.unpack('>I', file_data[4:8])[0] + structure_data = json.loads(file_data[8:bin_start].decode()) + return structure_data + + +def read_vxz(file, num_threads: int = -1) -> Union[torch.Tensor, Dict[str, torch.Tensor]]: + """ + Read a VXZ file containing voxels. + + Args: + file_path: Path or file-like object to the VXZ file. + num_threads: the number of threads to use for reading the file. + + Returns: + torch.Tensor: the coordinates of the voxels. + Dict[str, torch.Tensor]: the attributes of the voxels. + """ + if isinstance(file, str): + with open(file, 'rb') as f: + file_data = f.read() + else: + file_data = file.read() + + num_threads = num_threads if num_threads > 0 else os.cpu_count() + + # Parse header + assert file_data[:3] == b'VXZ', "Invalid file type" + version = file_data[3] + assert version == 0, "Invalid file version" + + bin_start = struct.unpack('>I', file_data[4:8])[0] + structure_data = json.loads(file_data[8:bin_start].decode()) + bin_data = file_data[bin_start:] + + # Decode chunks + chunk_size = structure_data['chunk_size'] + chunk_depth = np.log2(chunk_size) + assert chunk_depth.is_integer(), f"Chunk size must be a power of 2, got {chunk_size}" + chunk_depth = int(chunk_depth) + + def worker(chunk_info): + decompressed = {} + chunk_data = bin_data[chunk_info['ptr'][0]:chunk_info['ptr'][0]+chunk_info['ptr'][1]] + for k, v in chunk_info.items(): + if k in ['ptr', 'idx']: + continue + decompressed[k] = np.frombuffer(_decompress(chunk_data[v[0]:v[0]+v[1]], structure_data['compression'], structure_data['compression_level']), dtype=np.uint8) + svo = torch.tensor(np.frombuffer(decompressed['svo'], dtype=np.uint8)) + morton_code = _C.decode_sparse_voxel_octree_cpu(svo, chunk_depth) + coord = decode_seq(morton_code.int()).cpu() + + # deinterleave attributes + if structure_data['attr_interleave'] == 'none': + all_attr = [] + for k, chs in structure_data['attr']: + for i in range(chs): + all_attr.append(torch.tensor(decompressed[f'{k}_{i}'])) + all_attr = torch.stack(all_attr, dim=1) + elif structure_data['attr_interleave'] == 'as_is': + all_attr = [] + for k, chs in structure_data['attr']: + all_attr.append(torch.tensor(decompressed[k].reshape(-1, chs))) + all_attr = torch.cat(all_attr, dim=1) + elif structure_data['attr_interleave'] == 'all': + all_chs = sum(chs for k, chs in structure_data['attr']) + all_attr = decompressed['attr'].reshape(-1, all_chs) + + # unfilter + if structure_data['filter'] == 'none': + pass + elif structure_data['filter'] == 'parent': + all_attr = _C.decode_sparse_voxel_octree_attr_parent_cpu(svo, chunk_depth, all_attr) + elif structure_data['filter'] == 'neighbor': + all_attr = _C.decode_sparse_voxel_octree_attr_neighbor_cpu(coord, chunk_size, all_attr) + + # final + attr = {} + ch = 0 + for k, chs in structure_data['attr']: + attr[k] = all_attr[:, ch:ch+chs] + ch += chs + return { + 'coord': coord, + 'attr': attr, + } + + if num_threads == 1: + chunks = [worker(info) for info in structure_data['chunks']] + else: + with ThreadPoolExecutor(max_workers=num_threads) as executor: + chunks = list(executor.map(worker, structure_data['chunks'])) + + # Combine chunks + coord = [] + attr = {k: [] for k, _ in structure_data['attr']} + for info, chunk in zip(structure_data['chunks'], chunks): + coord.append(chunk['coord'] + torch.tensor([[info['idx'][0] * chunk_size, info['idx'][1] * chunk_size, info['idx'][2] * chunk_size]]).int()) + for k, v in chunk['attr'].items(): + attr[k].append(v) + coord = torch.cat(coord, dim=0) + for k, v in attr.items(): + attr[k] = torch.cat(v, dim=0) + return coord, attr + + +def write_vxz( + file, + coord: torch.Tensor, + attr: Dict[str, torch.Tensor], + chunk_size: int = 256, + filter: Literal['none', 'parent', 'neighbor'] = 'none', + compression: Literal['none', 'deflate', 'lzma', 'zstd'] = 'lzma', + compression_level: Optional[int] = None, + attr_interleave: Literal['none', 'as_is', 'all'] = 'as_is', + num_threads: int = -1, +): + """ + Write a VXZ file containing voxels. + + Args: + file: Path or file-like object to the VXZ file. + coord: the coordinates of the voxels. + attr: the attributes of the voxels. + chunk_size: the size of each chunk. + filter: the filter to apply to the voxels. + compression: the compression algorithm to use. + compression_level: the level of compression. + attr_interleave: how to interleave the attributes. + num_threads: the number of threads to use for compression. + """ + # Check + for k, v in attr.items(): + assert coord.shape[0] == v.shape[0], f"Number of coordinates and attributes do not match for key {k}" + assert v.dtype == torch.uint8, f"Attributes must be uint8, got {v.dtype} for key {k}" + assert attr_interleave in ['none', 'as_is', 'all'], f"Invalid attr_interleave value: {attr_interleave}" + + compression_level = compression_level or DEFAULT_COMPRESION_LEVEL[compression] + num_threads = num_threads if num_threads > 0 else os.cpu_count() + + file_info = { + 'num_voxel': coord.shape[0], + 'chunk_size': chunk_size, + 'filter': filter, + 'compression': compression, + 'compression_level': compression_level, + 'raw_size': sum([coord.numel() * 4] + [v.numel() for v in attr.values()]), + 'compressed_size': 0, + 'compress_ratio': 0.0, + 'attr_interleave': attr_interleave, + 'attr': [[k, v.shape[1]] for k, v in attr.items()], + 'chunks': [], + } + bin_data = b'' + + # Split into chunks + chunk_depth = np.log2(chunk_size) + assert chunk_depth.is_integer(), f"Chunk size must be a power of 2, got {chunk_size}" + chunk_depth = int(chunk_depth) + + chunk_coord = coord // chunk_size + coord = coord % chunk_size + unique_chunk_coord, inverse = torch.unique(chunk_coord, dim=0, return_inverse=True) + + chunks = [] + for idx, chunk_xyz in enumerate(unique_chunk_coord.tolist()): + chunk_mask = (inverse == idx) + chunks.append({ + 'idx': chunk_xyz, + 'coord': coord[chunk_mask], + 'attr': {k: v[chunk_mask] for k, v in attr.items()}, + }) + + # Compress each chunk + with ThreadPoolExecutor(max_workers=num_threads) as executor: + def worker(chunk): + ## compress to binary + coord = chunk['coord'] + morton_code = encode_seq(coord) + sorted_idx = morton_code.argsort().cpu() + coord = coord.cpu()[sorted_idx] + morton_code = morton_code.cpu()[sorted_idx] + attr = torch.cat([v.cpu()[sorted_idx] for v in chunk['attr'].values()], dim=1) + svo = _C.encode_sparse_voxel_octree_cpu(morton_code, chunk_depth) + svo_bytes = _compress(svo.numpy().tobytes(), compression, compression_level) + + # filter + if filter == 'none': + attr = attr.numpy() + elif filter == 'parent': + attr = _C.encode_sparse_voxel_octree_attr_parent_cpu(svo, chunk_depth, attr).numpy() + elif filter == 'neighbor': + attr = _C.encode_sparse_voxel_octree_attr_neighbor_cpu(coord, chunk_size, attr).numpy() + + # interleave attributes + attr_bytes = {} + if attr_interleave == 'none': + ch = 0 + for k, chs in file_info['attr']: + for i in range(chs): + attr_bytes[f'{k}_{i}'] = _compress(attr[:, ch].tobytes(), compression, compression_level) + ch += 1 + elif attr_interleave == 'as_is': + ch = 0 + for k, chs in file_info['attr']: + attr_bytes[k] = _compress(attr[:, ch:ch+chs].tobytes(), compression, compression_level) + ch += chs + elif attr_interleave == 'all': + attr_bytes['attr'] = _compress(attr.tobytes(), compression, compression_level) + + ## buffer for each chunk + chunk_info = {'idx': chunk['idx']} + bin_data = b'' + + ### svo + chunk_info['svo'] = [len(bin_data), len(svo_bytes)] + bin_data += svo_bytes + + ### attr + for k, v in attr_bytes.items(): + chunk_info[k] = [len(bin_data), len(v)] + bin_data += v + + return chunk_info, bin_data + + chunks = list(executor.map(worker, chunks)) + + for chunk_info, chunk_data in chunks: + chunk_info['ptr'] = [len(bin_data), len(chunk_data)] + bin_data += chunk_data + file_info['chunks'].append(chunk_info) + + file_info['compressed_size'] = len(bin_data) + file_info['compress_ratio'] = file_info['raw_size'] / file_info['compressed_size'] + + # File parts + structure_data = json.dumps(file_info).encode() + header = b'VXZ\x00' + struct.pack('>I', len(structure_data) + 8) + + # Write to file + if isinstance(file, str): + with open(file, 'wb') as f: + f.write(header) + f.write(structure_data) + f.write(bin_data) + else: + file.write(header) + file.write(structure_data) + file.write(bin_data) diff --git a/o-voxel/o_voxel/postprocess.py b/o-voxel/o_voxel/postprocess.py new file mode 100644 index 0000000000000000000000000000000000000000..217155953d09ffd5393a1756051983e7013e62fb --- /dev/null +++ b/o-voxel/o_voxel/postprocess.py @@ -0,0 +1,331 @@ +from typing import * +from tqdm import tqdm +import numpy as np +import torch +import cv2 +from PIL import Image +import trimesh +import trimesh.visual +from flex_gemm.ops.grid_sample import grid_sample_3d +import nvdiffrast.torch as dr +import cumesh + + +def to_glb( + vertices: torch.Tensor, + faces: torch.Tensor, + attr_volume: torch.Tensor, + coords: torch.Tensor, + attr_layout: Dict[str, slice], + aabb: Union[list, tuple, np.ndarray, torch.Tensor], + voxel_size: Union[float, list, tuple, np.ndarray, torch.Tensor] = None, + grid_size: Union[int, list, tuple, np.ndarray, torch.Tensor] = None, + decimation_target: int = 1000000, + texture_size: int = 2048, + remesh: bool = False, + remesh_band: float = 1, + remesh_project: float = 0.9, + mesh_cluster_threshold_cone_half_angle_rad=np.radians(90.0), + mesh_cluster_refine_iterations=0, + mesh_cluster_global_iterations=1, + mesh_cluster_smooth_strength=1, + verbose: bool = False, + use_tqdm: bool = False, +): + """ + Convert an extracted mesh to a GLB file. + Performs cleaning, optional remeshing, UV unwrapping, and texture baking from a volume. + + Args: + vertices: (N, 3) tensor of vertex positions + faces: (M, 3) tensor of vertex indices + attr_volume: (L, C) features of a sprase tensor for attribute interpolation + coords: (L, 3) tensor of coordinates for each voxel + attr_layout: dictionary of slice objects for each attribute + aabb: (2, 3) tensor of minimum and maximum coordinates of the volume + voxel_size: (3,) tensor of size of each voxel + grid_size: (3,) tensor of number of voxels in each dimension + decimation_target: target number of vertices for mesh simplification + texture_size: size of the texture for baking + remesh: whether to perform remeshing + remesh_band: size of the remeshing band + remesh_project: projection factor for remeshing + mesh_cluster_threshold_cone_half_angle_rad: threshold for cone-based clustering in uv unwrapping + mesh_cluster_refine_iterations: number of iterations for refining clusters in uv unwrapping + mesh_cluster_global_iterations: number of global iterations for clustering in uv unwrapping + mesh_cluster_smooth_strength: strength of smoothing for clustering in uv unwrapping + verbose: whether to print verbose messages + use_tqdm: whether to use tqdm to display progress bar + """ + # --- Input Normalization (AABB, Voxel Size, Grid Size) --- + if isinstance(aabb, (list, tuple)): + aabb = np.array(aabb) + if isinstance(aabb, np.ndarray): + aabb = torch.tensor(aabb, dtype=torch.float32, device=coords.device) + assert isinstance(aabb, torch.Tensor), f"aabb must be a list, tuple, np.ndarray, or torch.Tensor, but got {type(aabb)}" + assert aabb.dim() == 2, f"aabb must be a 2D tensor, but got {aabb.shape}" + assert aabb.size(0) == 2, f"aabb must have 2 rows, but got {aabb.size(0)}" + assert aabb.size(1) == 3, f"aabb must have 3 columns, but got {aabb.size(1)}" + + # Calculate grid dimensions based on AABB and voxel size + if voxel_size is not None: + if isinstance(voxel_size, float): + voxel_size = [voxel_size, voxel_size, voxel_size] + if isinstance(voxel_size, (list, tuple)): + voxel_size = np.array(voxel_size) + if isinstance(voxel_size, np.ndarray): + voxel_size = torch.tensor(voxel_size, dtype=torch.float32, device=coords.device) + grid_size = ((aabb[1] - aabb[0]) / voxel_size).round().int() + else: + assert grid_size is not None, "Either voxel_size or grid_size must be provided" + if isinstance(grid_size, int): + grid_size = [grid_size, grid_size, grid_size] + if isinstance(grid_size, (list, tuple)): + grid_size = np.array(grid_size) + if isinstance(grid_size, np.ndarray): + grid_size = torch.tensor(grid_size, dtype=torch.int32, device=coords.device) + voxel_size = (aabb[1] - aabb[0]) / grid_size + + # Assertions for dimensions + assert isinstance(voxel_size, torch.Tensor) + assert voxel_size.dim() == 1 and voxel_size.size(0) == 3 + assert isinstance(grid_size, torch.Tensor) + assert grid_size.dim() == 1 and grid_size.size(0) == 3 + + if use_tqdm: + pbar = tqdm(total=6, desc="Extracting GLB") + if verbose: + print(f"Original mesh: {vertices.shape[0]} vertices, {faces.shape[0]} faces") + + # Move data to GPU + vertices = vertices.cuda() + faces = faces.cuda() + + # Initialize CUDA mesh handler + mesh = cumesh.CuMesh() + mesh.init(vertices, faces) + + # --- Initial Mesh Cleaning --- + # Fills holes as much as we can before processing + mesh.fill_holes(max_hole_perimeter=3e-2) + if verbose: + print(f"After filling holes: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + vertices, faces = mesh.read() + if use_tqdm: + pbar.update(1) + + # Build BVH for the current mesh to guide remeshing + if use_tqdm: + pbar.set_description("Building BVH") + if verbose: + print(f"Building BVH for current mesh...", end='', flush=True) + bvh = cumesh.cuBVH(vertices, faces) + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + if use_tqdm: + pbar.set_description("Cleaning mesh") + if verbose: + print("Cleaning mesh...") + + # --- Branch 1: Standard Pipeline (Simplification & Cleaning) --- + if not remesh: + # Step 1: Aggressive simplification (3x target) + mesh.simplify(decimation_target * 3, verbose=verbose) + if verbose: + print(f"After inital simplification: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 2: Clean up topology (duplicates, non-manifolds, isolated parts) + mesh.remove_duplicate_faces() + mesh.repair_non_manifold_edges() + mesh.remove_small_connected_components(1e-5) + mesh.fill_holes(max_hole_perimeter=3e-2) + if verbose: + print(f"After initial cleanup: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 3: Final simplification to target count + mesh.simplify(decimation_target, verbose=verbose) + if verbose: + print(f"After final simplification: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 4: Final Cleanup loop + mesh.remove_duplicate_faces() + mesh.repair_non_manifold_edges() + mesh.remove_small_connected_components(1e-5) + mesh.fill_holes(max_hole_perimeter=3e-2) + if verbose: + print(f"After final cleanup: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Step 5: Unify face orientations + mesh.unify_face_orientations() + + # --- Branch 2: Remeshing Pipeline --- + else: + center = aabb.mean(dim=0) + scale = (aabb[1] - aabb[0]).max().item() + resolution = grid_size.max().item() + + # Perform Dual Contouring remeshing (rebuilds topology) + mesh.init(*cumesh.remeshing.remesh_narrow_band_dc( + vertices, faces, + center = center, + scale = (resolution + 3 * remesh_band) / resolution * scale, + resolution = resolution, + band = remesh_band, + project_back = remesh_project, # Snaps vertices back to original surface + verbose = verbose, + bvh = bvh, + )) + if verbose: + print(f"After remeshing: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + # Simplify and clean the remeshed result (similar logic to above) + mesh.simplify(decimation_target, verbose=verbose) + if verbose: + print(f"After simplifying: {mesh.num_vertices} vertices, {mesh.num_faces} faces") + + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + + # --- UV Parameterization --- + if use_tqdm: + pbar.set_description("Parameterizing new mesh") + if verbose: + print("Parameterizing new mesh...") + + out_vertices, out_faces, out_uvs, out_vmaps = mesh.uv_unwrap( + compute_charts_kwargs={ + "threshold_cone_half_angle_rad": mesh_cluster_threshold_cone_half_angle_rad, + "refine_iterations": mesh_cluster_refine_iterations, + "global_iterations": mesh_cluster_global_iterations, + "smooth_strength": mesh_cluster_smooth_strength, + }, + return_vmaps=True, + verbose=verbose, + ) + out_vertices = out_vertices.cuda() + out_faces = out_faces.cuda() + out_uvs = out_uvs.cuda() + out_vmaps = out_vmaps.cuda() + mesh.compute_vertex_normals() + out_normals = mesh.read_vertex_normals()[out_vmaps] + + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + # --- Texture Baking (Attribute Sampling) --- + if use_tqdm: + pbar.set_description("Sampling attributes") + if verbose: + print("Sampling attributes...", end='', flush=True) + + # Setup differentiable rasterizer context + ctx = dr.RasterizeCudaContext() + # Prepare UV coordinates for rasterization (rendering in UV space) + uvs_rast = torch.cat([out_uvs * 2 - 1, torch.zeros_like(out_uvs[:, :1]), torch.ones_like(out_uvs[:, :1])], dim=-1).unsqueeze(0) + rast = torch.zeros((1, texture_size, texture_size, 4), device='cuda', dtype=torch.float32) + + # Rasterize in chunks to save memory + for i in range(0, out_faces.shape[0], 100000): + rast_chunk, _ = dr.rasterize( + ctx, uvs_rast, out_faces[i:i+100000], + resolution=[texture_size, texture_size], + ) + mask_chunk = rast_chunk[..., 3:4] > 0 + rast_chunk[..., 3:4] += i # Store face ID in alpha channel + rast = torch.where(mask_chunk, rast_chunk, rast) + + # Mask of valid pixels in texture + mask = rast[0, ..., 3] > 0 + + # Interpolate 3D positions in UV space (finding 3D coord for every texel) + pos = dr.interpolate(out_vertices.unsqueeze(0), rast, out_faces)[0][0] + valid_pos = pos[mask] + + # Map these positions back to the *original* high-res mesh to get accurate attributes + # This corrects geometric errors introduced by simplification/remeshing + _, face_id, uvw = bvh.unsigned_distance(valid_pos, return_uvw=True) + orig_tri_verts = vertices[faces[face_id.long()]] # (N_new, 3, 3) + valid_pos = (orig_tri_verts * uvw.unsqueeze(-1)).sum(dim=1) + + # Trilinear sampling from the attribute volume (Color, Material props) + attrs = torch.zeros(texture_size, texture_size, attr_volume.shape[1], device='cuda') + attrs[mask] = grid_sample_3d( + attr_volume, + torch.cat([torch.zeros_like(coords[:, :1]), coords], dim=-1), + shape=torch.Size([1, attr_volume.shape[1], *grid_size.tolist()]), + grid=((valid_pos - aabb[0]) / voxel_size).reshape(1, -1, 3), + mode='trilinear', + ) + if use_tqdm: + pbar.update(1) + if verbose: + print("Done") + + # --- Texture Post-Processing & Material Construction --- + if use_tqdm: + pbar.set_description("Finalizing mesh") + if verbose: + print("Finalizing mesh...", end='', flush=True) + + mask = mask.cpu().numpy() + + # Extract channels based on layout (BaseColor, Metallic, Roughness, Alpha) + base_color = np.clip(attrs[..., attr_layout['base_color']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + metallic = np.clip(attrs[..., attr_layout['metallic']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + roughness = np.clip(attrs[..., attr_layout['roughness']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + alpha = np.clip(attrs[..., attr_layout['alpha']].cpu().numpy() * 255, 0, 255).astype(np.uint8) + alpha_mode = 'OPAQUE' + + # Inpainting: fill gaps (dilation) to prevent black seams at UV boundaries + mask_inv = (~mask).astype(np.uint8) + base_color = cv2.inpaint(base_color, mask_inv, 3, cv2.INPAINT_TELEA) + metallic = cv2.inpaint(metallic, mask_inv, 1, cv2.INPAINT_TELEA)[..., None] + roughness = cv2.inpaint(roughness, mask_inv, 1, cv2.INPAINT_TELEA)[..., None] + alpha = cv2.inpaint(alpha, mask_inv, 1, cv2.INPAINT_TELEA)[..., None] + + # Create PBR material + # Standard PBR packs Metallic and Roughness into Blue and Green channels + material = trimesh.visual.material.PBRMaterial( + baseColorTexture=Image.fromarray(np.concatenate([base_color, alpha], axis=-1)), + baseColorFactor=np.array([255, 255, 255, 255], dtype=np.uint8), + metallicRoughnessTexture=Image.fromarray(np.concatenate([np.zeros_like(metallic), roughness, metallic], axis=-1)), + metallicFactor=1.0, + roughnessFactor=1.0, + alphaMode=alpha_mode, + doubleSided=True if not remesh else False, + ) + + # --- Coordinate System Conversion & Final Object --- + vertices_np = out_vertices.cpu().numpy() + faces_np = out_faces.cpu().numpy() + uvs_np = out_uvs.cpu().numpy() + normals_np = out_normals.cpu().numpy() + + # Swap Y and Z axes, invert Y (common conversion for GLB compatibility) + vertices_np[:, 1], vertices_np[:, 2] = vertices_np[:, 2], -vertices_np[:, 1] + normals_np[:, 1], normals_np[:, 2] = normals_np[:, 2], -normals_np[:, 1] + uvs_np[:, 1] = 1 - uvs_np[:, 1] # Flip UV V-coordinate + + textured_mesh = trimesh.Trimesh( + vertices=vertices_np, + faces=faces_np, + vertex_normals=normals_np, + process=False, + visual=trimesh.visual.TextureVisuals(uv=uvs_np, material=material) + ) + + if use_tqdm: + pbar.update(1) + pbar.close() + if verbose: + print("Done") + + return textured_mesh \ No newline at end of file diff --git a/o-voxel/o_voxel/rasterize.py b/o-voxel/o_voxel/rasterize.py new file mode 100644 index 0000000000000000000000000000000000000000..63ae53b61e0cb501eb274b342bc5d337adfabfee --- /dev/null +++ b/o-voxel/o_voxel/rasterize.py @@ -0,0 +1,111 @@ +import torch +import torch.nn.functional as F +from easydict import EasyDict as edict +from . import _C + + +def intrinsics_to_projection( + intrinsics: torch.Tensor, + near: float, + far: float, + ) -> torch.Tensor: + """ + OpenCV intrinsics to OpenGL perspective matrix + + Args: + intrinsics (torch.Tensor): [3, 3] OpenCV intrinsics matrix + near (float): near plane to clip + far (float): far plane to clip + Returns: + (torch.Tensor): [4, 4] OpenGL perspective matrix + """ + fx, fy = intrinsics[0, 0], intrinsics[1, 1] + cx, cy = intrinsics[0, 2], intrinsics[1, 2] + ret = torch.zeros((4, 4), dtype=intrinsics.dtype, device=intrinsics.device) + ret[0, 0] = 2 * fx + ret[1, 1] = 2 * fy + ret[0, 2] = 2 * cx - 1 + ret[1, 2] = - 2 * cy + 1 + ret[2, 2] = far / (far - near) + ret[2, 3] = near * far / (near - far) + ret[3, 2] = 1. + return ret + + +class VoxelRenderer: + """ + Renderer for the Voxel representation. + + Args: + rendering_options (dict): Rendering options. + """ + + def __init__(self, rendering_options={}) -> None: + self.rendering_options = edict({ + "resolution": None, + "near": 0.1, + "far": 10.0, + "ssaa": 1, + }) + self.rendering_options.update(rendering_options) + + def render( + self, + position: torch.Tensor, + attrs: torch.Tensor, + voxel_size: float, + extrinsics: torch.Tensor, + intrinsics: torch.Tensor, + ) -> edict: + """ + Render the octree. + + Args: + position (torch.Tensor): (N, 3) xyz positions + attrs (torch.Tensor): (N, C) attributes + voxel_size (float): voxel size + extrinsics (torch.Tensor): (4, 4) camera extrinsics + intrinsics (torch.Tensor): (3, 3) camera intrinsics + + Returns: + edict containing: + attr (torch.Tensor): (C, H, W) rendered color + depth (torch.Tensor): (H, W) rendered depth + alpha (torch.Tensor): (H, W) rendered alpha + """ + resolution = self.rendering_options["resolution"] + near = self.rendering_options["near"] + far = self.rendering_options["far"] + ssaa = self.rendering_options["ssaa"] + + view = extrinsics + perspective = intrinsics_to_projection(intrinsics, near, far) + camera = torch.inverse(view)[:3, 3] + focalx = intrinsics[0, 0] + focaly = intrinsics[1, 1] + args = ( + position, + attrs, + voxel_size, + view.T.contiguous(), + (perspective @ view).T.contiguous(), + camera, + 0.5 / focalx, + 0.5 / focaly, + resolution * ssaa, + resolution * ssaa, + ) + color, depth, alpha = _C.rasterize_voxels_cuda(*args) + + if ssaa > 1: + color = F.interpolate(color[None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() + depth = F.interpolate(depth[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() + alpha = F.interpolate(alpha[None, None], size=(resolution, resolution), mode='bilinear', align_corners=False, antialias=True).squeeze() + + ret = edict({ + 'attr': color, + 'depth': depth, + 'alpha': alpha, + }) + return ret + \ No newline at end of file diff --git a/o-voxel/o_voxel/serialize.py b/o-voxel/o_voxel/serialize.py new file mode 100644 index 0000000000000000000000000000000000000000..daf7598059ceb40e2aca64f97503c36ae5ccba0a --- /dev/null +++ b/o-voxel/o_voxel/serialize.py @@ -0,0 +1,68 @@ +from typing import * +import torch +from . import _C + + +@torch.no_grad() +def encode_seq(coords: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor: + """ + Encodes 3D coordinates into a 30-bit code. + + Args: + coords: a tensor of shape [N, 3] containing the 3D coordinates. + permute: the permutation of the coordinates. + mode: the encoding mode to use. + """ + assert coords.shape[-1] == 3 and coords.ndim == 2, "Input coordinates must be of shape [N, 3]" + x = coords[:, permute[0]].int() + y = coords[:, permute[1]].int() + z = coords[:, permute[2]].int() + if mode == 'z_order': + if coords.device.type == 'cpu': + return _C.z_order_encode_cpu(x, y, z) + elif coords.device.type == 'cuda': + return _C.z_order_encode_cuda(x, y, z) + else: + raise ValueError(f"Unsupported device type: {coords.device.type}") + elif mode == 'hilbert': + if coords.device.type == 'cpu': + return _C.hilbert_encode_cpu(x, y, z) + elif coords.device.type == 'cuda': + return _C.hilbert_encode_cuda(x, y, z) + else: + raise ValueError(f"Unsupported device type: {coords.device.type}") + else: + raise ValueError(f"Unknown encoding mode: {mode}") + + +@torch.no_grad() +def decode_seq(code: torch.Tensor, permute: List[int] = [0, 1, 2], mode: Literal['z_order', 'hilbert'] = 'z_order') -> torch.Tensor: + """ + Decodes a 30-bit code into 3D coordinates. + + Args: + code: a tensor of shape [N] containing the 30-bit code. + permute: the permutation of the coordinates. + mode: the decoding mode to use. + """ + assert code.ndim == 1, "Input code must be of shape [N]" + if mode == 'z_order': + if code.device.type == 'cpu': + coords = _C.z_order_decode_cpu(code) + elif code.device.type == 'cuda': + coords = _C.z_order_decode_cuda(code) + else: + raise ValueError(f"Unsupported device type: {code.device.type}") + elif mode == 'hilbert': + if code.device.type == 'cpu': + coords = _C.hilbert_decode_cpu(code) + elif code.device.type == 'cuda': + coords = _C.hilbert_decode_cuda(code) + else: + raise ValueError(f"Unsupported device type: {code.device.type}") + else: + raise ValueError(f"Unknown decoding mode: {mode}") + x = coords[permute.index(0)] + y = coords[permute.index(1)] + z = coords[permute.index(2)] + return torch.stack([x, y, z], dim=-1) diff --git a/o-voxel/pyproject.toml b/o-voxel/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..11cc2b37fffcd91084a78990dc39d253c18bb0ec --- /dev/null +++ b/o-voxel/pyproject.toml @@ -0,0 +1,34 @@ +[build-system] +requires = [ + "setuptools>=64", + "wheel", + "torch", + "numpy", + "plyfile", + "trimesh", + "tqdm", + "zstandard", + "easydict" +] +build-backend = "setuptools.build_meta" + + +[project] +name = "o_voxel" +version = "0.0.1" +description = "All about voxel." +requires-python = ">=3.8" +authors = [ + { name = "Jianfeng Xiang", email = "belljig@outlook.com" } +] +dependencies = [ + "torch", + "numpy", + "plyfile", + "trimesh", + "tqdm", + "zstandard", + "easydict", + "cumesh @ git+https://github.com/JeffreyXiang/CuMesh.git", + "flex_gemm @ git+https://github.com/JeffreyXiang/FlexGEMM.git", +] diff --git a/o-voxel/setup.py b/o-voxel/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..a8dfa83c1b65c3834a31528e7276f872a6649f69 --- /dev/null +++ b/o-voxel/setup.py @@ -0,0 +1,81 @@ +from setuptools import setup +from torch.utils.cpp_extension import CUDAExtension, BuildExtension, IS_HIP_EXTENSION +import os +import sys + +ROOT = os.path.dirname(os.path.abspath(__file__)) +BUILD_TARGET = os.environ.get("BUILD_TARGET", "auto") + +if BUILD_TARGET == "auto": + if IS_HIP_EXTENSION: + IS_HIP = True + else: + IS_HIP = False +else: + if BUILD_TARGET == "cuda": + IS_HIP = False + elif BUILD_TARGET == "rocm": + IS_HIP = True + +if not IS_HIP: + cc_flag = [] +else: + archs = os.getenv("GPU_ARCHS", "native").split(";") + cc_flag = [f"--offload-arch={arch}" for arch in archs] + +# Platform-specific compiler flags +IS_WINDOWS = sys.platform == "win32" + +if IS_WINDOWS: + # MSVC flags + # Use C++20 for better std::byte handling + cxx_flags = ["/O2", "/std:c++20"] + nvcc_flags = ["-O3", "-std=c++20"] + cc_flag +else: + # GCC/Clang flags + cxx_flags = ["-O3", "-std=c++17"] + nvcc_flags = ["-O3", "-std=c++17"] + cc_flag + +setup( + name="o_voxel", + packages=[ + 'o_voxel', + 'o_voxel.convert', + 'o_voxel.io', + ], + ext_modules=[ + CUDAExtension( + name="o_voxel._C", + sources=[ + # Hashmap functions + "src/hash/hash.cu", + # Convert functions + "src/convert/flexible_dual_grid.cpp", + "src/convert/volumetic_attr.cpp", + ## Serialization functions + "src/serialize/api.cu", + "src/serialize/hilbert.cu", + "src/serialize/z_order.cu", + # IO functions + "src/io/svo.cpp", + "src/io/filter_parent.cpp", + "src/io/filter_neighbor.cpp", + # Rasterization functions + "src/rasterize/rasterize.cu", + + # main + "src/ext.cpp", + ], + include_dirs=[ + os.path.join(ROOT, "third_party/eigen"), + ], + extra_compile_args={ + "cxx": cxx_flags, + "nvcc": nvcc_flags, + } + ) + ], + cmdclass={ + 'build_ext': BuildExtension + } +) diff --git a/o-voxel/src/convert/api.h b/o-voxel/src/convert/api.h new file mode 100644 index 0000000000000000000000000000000000000000..e5d264d22ec55456abcf0ba6fbc6798142b62f15 --- /dev/null +++ b/o-voxel/src/convert/api.h @@ -0,0 +1,122 @@ +/* + * O-Voxel Convertion API + * + * Copyright (C) 2025, Jianfeng XIANG + * All rights reserved. + * + * Licensed under The MIT License [see LICENSE for details] + * + * Written by Jianfeng XIANG + */ + +#pragma once +#include + + +/** + * Extract flexible dual grid from a triangle mesh. + * + * @param vertices: Tensor of shape (N, 3) containing vertex positions. + * @param faces: Tensor of shape (M, 3) containing triangle vertex indices. + * @param voxel_size: Tensor of shape (3,) containing the voxel size in each dimension. + * @param grid_range: Tensor of shape (2, 3) containing the minimum and maximum coordinates of the grid range. + * @param face_weight: Weight for the face edges in the QEM computation. + * @param boundary_weight: Weight for the boundary edges in the QEM computation. + * @param regularization_weight: Regularization factor to apply to the QEM matrices. + * @param timing: Boolean flag to indicate whether to print timing information. + * + * @return a tuple ((x, y, z), vertices, intersected, faces) containing the remeshed vertices and the corresponding voxel grid. + */ +std::tuple mesh_to_flexible_dual_grid_cpu( + const torch::Tensor& vertices, + const torch::Tensor& faces, + const torch::Tensor& voxel_size, + const torch::Tensor& grid_range, + float face_weight, + float boundary_weight, + float regularization_weight, + bool timing +); + + +/** + * Voxelizes a triangle mesh with PBR materials + * + * @param voxel_size [3] tensor containing the size of a voxel + * @param grid_range [6] tensor containing the size of the grid + * @param vertices [N_tri, 3, 3] array containing the triangle vertices + * @param normals [N_tri, 3, 3] array containing the triangle vertex normals + * @param uvs [N_tri, 3, 2] tensor containing the texture coordinates + * @param materialIds [N_tri] tensor containing the material ids + * @param baseColorFactor list of [3] tensor containing the base color factor + * @param baseColorTexture list of [H, W, 3] tensor containing the base color texture + * @param baseColorTextureFilter list of int indicating the base color texture filter (0: NEAREST, 1: LINEAR) + * @param baseColorTextureWrap list of int indicating the base color texture wrap (0: REPEAT, 1: CLAMP_TO_EDGE, 2: MIRRORED_REPEAT) + * @param metallicFactor list of float containing the metallic factor + * @param metallicTexture list of [H, W] tensor containing the metallic texture + * @param metallicTextureFilter list of int indicating the metallic texture filter (0: NEAREST, 1: LINEAR) + * @param metallicTextureWrap list of int indicating the metallic texture wrap (0: REPEAT, 1: CLAMP_TO_EDGE, 2: MIRRORED_REPEAT) + * @param roughnessFactor list of float containing the roughness factor + * @param roughnessTexture list of [H, W] tensor containing the roughness texture + * @param roughnessTextureFilter list of int indicating the roughness texture filter (0: NEAREST, 1: LINEAR) + * @param roughnessTextureWrap list of int indicating the roughness texture wrap (0: REPEAT, 1: CLAMP_TO_EDGE, 2: MIRRORED_REPEAT) + * @param emissiveFactor list of [3] tensor containing the emissive factor + * @param emissiveTexture list of [H, W, 3] tensor containing the emissive texture + * @param emissiveTextureFilter list of int indicating the emissive texture filter (0: NEAREST, 1: LINEAR) + * @param emissiveTextureWrap list of int indicating the emissive texture wrap (0: REPEAT, 1: CLAMP_TO_EDGE, 2: MIRRORED_REPEAT) + * @param alphaMode list of int indicating the alpha mode (0: OPAQUE, 1: MASK, 2: BLEND) + * @param alphaCutoff list of float containing the alpha cutoff + * @param alphaFactor list of float containing the alpha factor + * @param alphaTexture list of [H, W] tensor containing the alpha texture + * @param alphaTextureFilter list of int indicating the alpha texture filter (0: NEAREST, 1: LINEAR) + * @param alphaTextureWrap list of int indicating the alpha texture wrap (0: REPEAT, 1: CLAMP_TO_EDGE, 2: MIRRORED_REPEAT) + * @param normalTexture list of [H, W, 3] tensor containing the normal texture + * @param normalTextureFilter list of int indicating the normal texture filter (0: NEAREST, 1: LINEAR) + * @param normalTextureWrap list of int indicating the normal texture wrap (0: REPEAT, 1: CLAMP_TO_EDGE, 2: MIRRORED_REPEAT) + * @param mipLevelOffset float indicating the mip level offset for texture mipmap + * + * @return tuple containing: + * - coords: tensor of shape [N, 3] containing the voxel coordinates + * - out_baseColor: tensor of shape [N, 3] containing the base color of each voxel + * - out_metallic: tensor of shape [N, 1] containing the metallic of each voxel + * - out_roughness: tensor of shape [N, 1] containing the roughness of each voxel + * - out_emissive: tensor of shape [N, 3] containing the emissive of each voxel + * - out_alpha: tensor of shape [N, 1] containing the alpha of each voxel + * - out_normal: tensor of shape [N, 3] containing the normal of each voxel + */ +std::tuple +textured_mesh_to_volumetric_attr_cpu( + const torch::Tensor& voxel_size, + const torch::Tensor& grid_range, + const torch::Tensor& vertices, + const torch::Tensor& normals, + const torch::Tensor& uvs, + const torch::Tensor& materialIds, + const std::vector& baseColorFactor, + const std::vector& baseColorTexture, + const std::vector& baseColorTextureFilter, + const std::vector& baseColorTextureWrap, + const std::vector& metallicFactor, + const std::vector& metallicTexture, + const std::vector& metallicTextureFilter, + const std::vector& metallicTextureWrap, + const std::vector& roughnessFactor, + const std::vector& roughnessTexture, + const std::vector& roughnessTextureFilter, + const std::vector& roughnessTextureWrap, + const std::vector& emissiveFactor, + const std::vector& emissiveTexture, + const std::vector& emissiveTextureFilter, + const std::vector& emissiveTextureWrap, + const std::vector& alphaMode, + const std::vector& alphaCutoff, + const std::vector& alphaFactor, + const std::vector& alphaTexture, + const std::vector& alphaTextureFilter, + const std::vector& alphaTextureWrap, + const std::vector& normalTexture, + const std::vector& normalTextureFilter, + const std::vector& normalTextureWrap, + const float mipLevelOffset, + const bool timing +); diff --git a/o-voxel/src/convert/flexible_dual_grid.cpp b/o-voxel/src/convert/flexible_dual_grid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0e6cb805d13ce2654e6ffa93315a7456eeabf498 --- /dev/null +++ b/o-voxel/src/convert/flexible_dual_grid.cpp @@ -0,0 +1,775 @@ +#include +#include +#include +#include +#include + +#include "api.h" + + +constexpr size_t kInvalidIndex = std::numeric_limits::max(); + + +struct float3 {float x, y, z; float& operator[](int i) {return (&x)[i];}}; +struct int3 {int x, y, z; int& operator[](int i) {return (&x)[i];}}; +struct int4 {int x, y, z, w; int& operator[](int i) {return (&x)[i];}}; +struct bool3 {bool x, y, z; bool& operator[](int i) {return (&x)[i];}}; + + +template +static inline U lerp(const T& a, const T& b, const T& t, const U& val_a, const U& val_b) { + if (a == b) return val_a; // Avoid divide by zero + T alpha = (t - a) / (b - a); + return (1 - alpha) * val_a + alpha * val_b; +} + + +template +static auto get_or_default(const Map& map, const Key& key, const Default& default_val) -> typename Map::mapped_type { + auto it = map.find(key); + return (it != map.end()) ? it->second : default_val; +} + + +// 3D voxel coordinate +struct VoxelCoord { + int x, y, z; + + int& operator[](int i) { + return (&x)[i]; + } + + bool operator==(const VoxelCoord& other) const { + return x == other.x && y == other.y && z == other.z; + } +}; + +// Hash function for VoxelCoord to use in unordered_map +namespace std { +template <> +struct hash { + size_t operator()(const VoxelCoord& v) const { + const std::size_t p1 = 73856093; + const std::size_t p2 = 19349663; + const std::size_t p3 = 83492791; + return (std::size_t)(v.x) * p1 ^ (std::size_t)(v.y) * p2 ^ (std::size_t)(v.z) * p3; + } +}; +} + + +void intersect_qef( + const Eigen::Vector3f& voxel_size, + const Eigen::Vector3i& grid_min, + const Eigen::Vector3i& grid_max, + const std::vector& triangles, // 3 vertices per triangle + std::unordered_map& hash_table, // Hash table for voxel lookup + std::vector& voxels, // Output: Voxel coordinates + std::vector& means, // Output: Mean vertex positions for each voxel + std::vector& cnt, // Output: Number of intersections for each voxel + std::vector& intersected, // Output: Whether edge of voxel intersects with triangle + std::vector& qefs // Output: QEF matrices for each voxel +) { + const size_t N_tri = triangles.size() / 3; + + for (size_t i = 0; i < N_tri; ++i) { + const Eigen::Vector3f& v0 = triangles[i * 3 + 0]; + const Eigen::Vector3f& v1 = triangles[i * 3 + 1]; + const Eigen::Vector3f& v2 = triangles[i * 3 + 2]; + + // Compute edge vectors and face normal + Eigen::Vector3f e0 = v1 - v0; + Eigen::Vector3f e1 = v2 - v1; + Eigen::Vector3f n = e0.cross(e1).normalized(); + Eigen::Vector4f plane; + plane << n.x(), n.y(), n.z(), -n.dot(v0); + auto Q = plane * plane.transpose(); + + // Scan-line algorithm to find intersections with the voxel grid from three directions + /* + t0 + | \ + | t1 + | / + t2 + */ + auto scan_line_fill = [&] (const int ax2) { + int ax0 = (ax2 + 1) % 3; + int ax1 = (ax2 + 2) % 3; + + // Canonical question + std::array t = { + Eigen::Vector3d(v0[ax0], v0[ax1], v0[ax2]), + Eigen::Vector3d(v1[ax0], v1[ax1], v1[ax2]), + Eigen::Vector3d(v2[ax0], v2[ax1], v2[ax2]) + }; + std::sort(t.begin(), t.end(), [](const Eigen::Vector3d& a, const Eigen::Vector3d& b) { return a.y() < b.y(); }); + + // Scan-line algorithm + int start = std::clamp(int(t[0].y() / voxel_size[ax1]), grid_min[ax1], grid_max[ax1] - 1); + int mid = std::clamp(int(t[1].y() / voxel_size[ax1]), grid_min[ax1], grid_max[ax1] - 1); + int end = std::clamp(int(t[2].y() / voxel_size[ax1]), grid_min[ax1], grid_max[ax1] - 1); + + auto scan_line_half = [&] (const int row_start, const int row_end, const Eigen::Vector3d t0, const Eigen::Vector3d t1, const Eigen::Vector3d t2) { + /* + t0 + | \ + t3-t4 + | \ + t1---t2 + */ + for (int y_idx = row_start; y_idx < row_end; ++y_idx) { + double y = (y_idx + 1) * voxel_size[ax1]; + Eigen::Vector2d t3 = lerp(t0.y(), t1.y(), y, Eigen::Vector2d(t0.x(), t0.z()), Eigen::Vector2d(t1.x(), t1.z())); + Eigen::Vector2d t4 = lerp(t0.y(), t2.y(), y, Eigen::Vector2d(t0.x(), t0.z()), Eigen::Vector2d(t2.x(), t2.z())); + if (t3.x() > t4.x()) std::swap(t3, t4); + int line_start = std::clamp(int(t3.x() / voxel_size[ax0]), grid_min[ax0], grid_max[ax0] - 1); + int line_end = std::clamp(int(t4.x() / voxel_size[ax0]), grid_min[ax0], grid_max[ax0] - 1); + for (int x_idx = line_start; x_idx < line_end; ++x_idx) { + double x = (x_idx + 1) * voxel_size[ax0]; + double z = lerp(t3.x(), t4.x(), x, t3.y(), t4.y()); + int z_idx = int(z / voxel_size[ax2]); + if (z_idx >= grid_min[ax2] && z_idx < grid_max[ax2]) { + // For 4-connected voxels + for (int dx = 0; dx < 2; ++dx) { + for (int dy = 0; dy < 2; ++dy) { + VoxelCoord coord; + coord[ax0] = x_idx + dx; coord[ax1] = y_idx + dy; coord[ax2] = z_idx; + Eigen::Vector3d intersect; + intersect[ax0] = x; intersect[ax1] = y; intersect[ax2] = z; + auto kv = hash_table.find(coord); + if (kv == hash_table.end()) { + hash_table[coord] = voxels.size(); + voxels.push_back({coord.x, coord.y, coord.z}); + means.push_back(intersect.cast()); + cnt.push_back(1); + intersected.push_back({false, false, false}); + qefs.push_back(Q); + if (dx == 0 && dy == 0) + intersected.back()[ax2] = true; + } + else { + auto i = kv->second; + means[i] += intersect.cast(); + cnt[i] += 1; + if (dx == 0 && dy == 0) + intersected[i][ax2] = true; + qefs[i] += Q; + } + } + } + } + } + } + }; + scan_line_half(start, mid, t[0], t[1], t[2]); + scan_line_half(mid, end, t[2], t[1], t[0]); + }; + scan_line_fill(0); + scan_line_fill(1); + scan_line_fill(2); + } +} + + +void face_qef( + const Eigen::Vector3f& voxel_size, + const Eigen::Vector3i& grid_min, + const Eigen::Vector3i& grid_max, + const std::vector& triangles, // 3 vertices per triangle + std::unordered_map& hash_table, // Hash table for voxel lookup + std::vector& qefs // Output: QEF matrices for each voxel +) { + const size_t N_tri = triangles.size() / 3; + + for (size_t i = 0; i < N_tri; ++i) { + const Eigen::Vector3f& v0 = triangles[i * 3 + 0]; + const Eigen::Vector3f& v1 = triangles[i * 3 + 1]; + const Eigen::Vector3f& v2 = triangles[i * 3 + 2]; + + // Compute edge vectors and face normal + Eigen::Vector3f e0 = v1 - v0; + Eigen::Vector3f e1 = v2 - v1; + Eigen::Vector3f e2 = v0 - v2; + Eigen::Vector3f n = e0.cross(e1).normalized(); + Eigen::Vector4f plane; + plane << n.x(), n.y(), n.z(), -n.dot(v0); + auto Q = plane * plane.transpose(); + + // Compute triangle bounding box in voxel coordinates + Eigen::Vector3f bb_min_f = v0.cwiseMin(v1).cwiseMin(v2).cwiseQuotient(voxel_size); + Eigen::Vector3f bb_max_f = v0.cwiseMax(v1).cwiseMax(v2).cwiseQuotient(voxel_size); + + Eigen::Vector3i bb_min(std::max(static_cast(bb_min_f.x()), grid_min.x()), + std::max(static_cast(bb_min_f.y()), grid_min.y()), + std::max(static_cast(bb_min_f.z()), grid_min.z())); + Eigen::Vector3i bb_max(std::min(static_cast(bb_max_f.x() + 1), grid_max.x()), + std::min(static_cast(bb_max_f.y() + 1), grid_max.y()), + std::min(static_cast(bb_max_f.z() + 1), grid_max.z())); + + // Plane test setup + Eigen::Vector3f c( + n.x() > 0.0f ? voxel_size.x() : 0.0f, + n.y() > 0.0f ? voxel_size.y() : 0.0f, + n.z() > 0.0f ? voxel_size.z() : 0.0f + ); + float d1 = n.dot(c - v0); + float d2 = n.dot(voxel_size - c - v0); + + // XY plane projection test setup + int mul_xy = (n.z() < 0.0f) ? -1 : 1; + Eigen::Vector2f n_xy_e0(-mul_xy * e0.y(), mul_xy * e0.x()); + Eigen::Vector2f n_xy_e1(-mul_xy * e1.y(), mul_xy * e1.x()); + Eigen::Vector2f n_xy_e2(-mul_xy * e2.y(), mul_xy * e2.x()); + + float d_xy_e0 = -n_xy_e0.dot(v0.head<2>()) + n_xy_e0.cwiseMax(0.0f).dot(voxel_size.head<2>()); + float d_xy_e1 = -n_xy_e1.dot(v1.head<2>()) + n_xy_e1.cwiseMax(0.0f).dot(voxel_size.head<2>()); + float d_xy_e2 = -n_xy_e2.dot(v2.head<2>()) + n_xy_e2.cwiseMax(0.0f).dot(voxel_size.head<2>()); + + // YZ plane projection test setup + int mul_yz = (n.x() < 0.0f) ? -1 : 1; + Eigen::Vector2f n_yz_e0(-mul_yz * e0.z(), mul_yz * e0.y()); + Eigen::Vector2f n_yz_e1(-mul_yz * e1.z(), mul_yz * e1.y()); + Eigen::Vector2f n_yz_e2(-mul_yz * e2.z(), mul_yz * e2.y()); + + float d_yz_e0 = -n_yz_e0.dot(Eigen::Vector2f(v0.y(), v0.z())) + n_yz_e0.cwiseMax(0.0f).dot(Eigen::Vector2f(voxel_size.y(), voxel_size.z())); + float d_yz_e1 = -n_yz_e1.dot(Eigen::Vector2f(v1.y(), v1.z())) + n_yz_e1.cwiseMax(0.0f).dot(Eigen::Vector2f(voxel_size.y(), voxel_size.z())); + float d_yz_e2 = -n_yz_e2.dot(Eigen::Vector2f(v2.y(), v2.z())) + n_yz_e2.cwiseMax(0.0f).dot(Eigen::Vector2f(voxel_size.y(), voxel_size.z())); + + // ZX plane projection test setup + int mul_zx = (n.y() < 0.0f) ? -1 : 1; + Eigen::Vector2f n_zx_e0(-mul_zx * e0.x(), mul_zx * e0.z()); + Eigen::Vector2f n_zx_e1(-mul_zx * e1.x(), mul_zx * e1.z()); + Eigen::Vector2f n_zx_e2(-mul_zx * e2.x(), mul_zx * e2.z()); + + float d_zx_e0 = -n_zx_e0.dot(Eigen::Vector2f(v0.z(), v0.x())) + n_zx_e0.cwiseMax(0.0f).dot(Eigen::Vector2f(voxel_size.z(), voxel_size.x())); + float d_zx_e1 = -n_zx_e1.dot(Eigen::Vector2f(v1.z(), v1.x())) + n_zx_e1.cwiseMax(0.0f).dot(Eigen::Vector2f(voxel_size.z(), voxel_size.x())); + float d_zx_e2 = -n_zx_e2.dot(Eigen::Vector2f(v2.z(), v2.x())) + n_zx_e2.cwiseMax(0.0f).dot(Eigen::Vector2f(voxel_size.z(), voxel_size.x())); + + // Loop over candidate voxels inside bounding box + for (int z = bb_min.z(); z < bb_max.z(); ++z) { + for (int y = bb_min.y(); y < bb_max.y(); ++y) { + for (int x = bb_min.x(); x < bb_max.x(); ++x) { + // Voxel center + Eigen::Vector3f p = voxel_size.cwiseProduct(Eigen::Vector3f(x, y, z)); + + // Plane through box test + float nDOTp = n.dot(p); + if (((nDOTp + d1) * (nDOTp + d2)) > 0.0f) continue; + + // XY projection test + Eigen::Vector2f p_xy(p.x(), p.y()); + if (n_xy_e0.dot(p_xy) + d_xy_e0 < 0) continue; + if (n_xy_e1.dot(p_xy) + d_xy_e1 < 0) continue; + if (n_xy_e2.dot(p_xy) + d_xy_e2 < 0) continue; + + // YZ projection test + Eigen::Vector2f p_yz(p.y(), p.z()); + if (n_yz_e0.dot(p_yz) + d_yz_e0 < 0) continue; + if (n_yz_e1.dot(p_yz) + d_yz_e1 < 0) continue; + if (n_yz_e2.dot(p_yz) + d_yz_e2 < 0) continue; + + // ZX projection test + Eigen::Vector2f p_zx(p.z(), p.x()); + if (n_zx_e0.dot(p_zx) + d_zx_e0 < 0) continue; + if (n_zx_e1.dot(p_zx) + d_zx_e1 < 0) continue; + if (n_zx_e2.dot(p_zx) + d_zx_e2 < 0) continue; + + // Passed all tests — mark voxel + auto coord = VoxelCoord{x, y, z}; + auto kv = hash_table.find(coord); + if (kv != hash_table.end()) { + qefs[kv->second] += Q; + } + } + } + } + } +} + + +void boundry_qef( + const Eigen::Vector3f& voxel_size, + const Eigen::Vector3i& grid_min, + const Eigen::Vector3i& grid_max, + const std::vector& boundries, // 2 vertices per segment + const float boundary_weight, // Weight for boundary edges + std::unordered_map& hash_table, // Hash table for voxel lookup + std::vector& qefs // Output: QEF matrices for each voxel +) { + for (size_t i = 0; i < boundries.size() / 2; ++i) { + const Eigen::Vector3f& v0 = boundries[i * 2 + 0]; + const Eigen::Vector3f& v1 = boundries[i * 2 + 1]; + + // Calculate the QEF for the edge (boundary) defined by v0 and v1 + Eigen::Vector3d dir(v1.x() - v0.x(), v1.y() - v0.y(), v1.z() - v0.z()); + double segment_length = dir.norm(); + if (segment_length < 1e-6) continue; // Skip degenerate edges (zero-length) + dir.normalize(); // unit direction vector + + // Projection matrix orthogonal to the direction: I - d d^T + Eigen::Matrix3f A = Eigen::Matrix3f::Identity() - (dir * dir.transpose()).cast(); + + // b = -A * v0 + Eigen::Vector3f b = -A * v0; + + // c = v0^T * A * v0 + float c = v0.transpose() * A * v0; + + // Now pack this into a 4x4 QEF matrix + Eigen::Matrix4f Q = Eigen::Matrix4f::Zero(); + Q.block<3, 3>(0, 0) = A; + Q.block<3, 1>(0, 3) = b; + Q.block<1, 3>(3, 0) = b.transpose(); + Q(3, 3) = c; + + // DDA Traversal logic directly inside the function + + // Starting and ending voxel coordinates + Eigen::Vector3i v0_voxel = (v0.cwiseQuotient(voxel_size)).array().floor().cast(); + Eigen::Vector3i v1_voxel = (v1.cwiseQuotient(voxel_size)).array().floor().cast(); + + // Determine step direction for each axis based on the line direction + Eigen::Vector3i step = (dir.array() > 0).select(Eigen::Vector3i(1, 1, 1), Eigen::Vector3i(-1, -1, -1)); + + Eigen::Vector3d tMax, tDelta; + for (int axis = 0; axis < 3; ++axis) { + if (dir[axis] == 0.0) { + tMax[axis] = std::numeric_limits::infinity(); + tDelta[axis] = std::numeric_limits::infinity(); + } else { + float voxel_border = voxel_size[axis] * (v0_voxel[axis] + (step[axis] > 0 ? 1 : 0)); + tMax[axis] = (voxel_border - v0[axis]) / dir[axis]; + tDelta[axis] = voxel_size[axis] / std::abs(dir[axis]); + } + } + + // Current voxel position + Eigen::Vector3i current = v0_voxel; + + // Store the voxel we start at + std::vector voxels; + voxels.push_back({current.x(), current.y(), current.z()}); + + // Traverse the voxels + while (true) { + int axis; + if (tMax.x() < tMax.y()) { + axis = (tMax.x() < tMax.z()) ? 0 : 2; + } else { + axis = (tMax.y() < tMax.z()) ? 1 : 2; + } + + if (tMax[axis] > segment_length) break; + + current[axis] += step[axis]; + tMax[axis] += tDelta[axis]; + + voxels.push_back({current.x(), current.y(), current.z()}); + } + + // Accumulate QEF for each voxel passed through + for (const auto& coord : voxels) { + // Make sure the voxel is within bounds + if ((coord.x < grid_min.x() || coord.x >= grid_max.x()) || + (coord.y < grid_min.y() || coord.y >= grid_max.y()) || + (coord.z < grid_min.z() || coord.z >= grid_max.z())) continue; + if (!hash_table.count(coord)) continue; // Skip if voxel not in hash table + + // Accumulate the QEF for this voxel + qefs[hash_table[coord]] += boundary_weight * Q; // Scale by boundary weight + } + } +} + + +std::array quad_to_2tri( + const std::vector& vertices, + const int4& quad_indices +) { + int ia = quad_indices.x; + int ib = quad_indices.y; + int ic = quad_indices.z; + int id = quad_indices.w; + + Eigen::Vector3f a(vertices[ia].x, vertices[ia].y, vertices[ia].z); + Eigen::Vector3f b(vertices[ib].x, vertices[ib].y, vertices[ib].z); + Eigen::Vector3f c(vertices[ic].x, vertices[ic].y, vertices[ic].z); + Eigen::Vector3f d(vertices[id].x, vertices[id].y, vertices[id].z); + + // diagonal AC + Eigen::Vector3f n_abc = (b - a).cross(c - a).normalized(); + Eigen::Vector3f n_acd = (c - a).cross(d - a).normalized(); + float angle_ac = std::acos(std::clamp(n_abc.dot(n_acd), -1.0f, 1.0f)); + + // diagonal BD + Eigen::Vector3f n_abd = (b - a).cross(d - a).normalized(); + Eigen::Vector3f n_bcd = (c - b).cross(d - b).normalized(); + float angle_bd = std::acos(std::clamp(n_abd.dot(n_bcd), -1.0f, 1.0f)); + + if (angle_ac <= angle_bd) { + return {int3{ia, ib, ic}, int3{ia, ic, id}}; + } else { + return {int3{ia, ib, id}, int3{ib, ic, id}}; + } +} + + +void face_from_dual_vertices( + const std::unordered_map& hash_table, + const std::vector& voxels, + const std::vector& dual_vertices, + const std::vector& intersected, + std::vector& face_indices +) { + for (int i = 0; i < dual_vertices.size(); ++i) { + int3 coord = voxels[i]; + bool3 is_intersected = intersected[i]; + + // Check existence of neighboring 6 voxels + size_t neigh_indices[6] = { + get_or_default(hash_table, VoxelCoord{coord.x + 1, coord.y, coord.z}, kInvalidIndex), + get_or_default(hash_table, VoxelCoord{coord.x, coord.y + 1, coord.z}, kInvalidIndex), + get_or_default(hash_table, VoxelCoord{coord.x + 1, coord.y + 1, coord.z}, kInvalidIndex), + get_or_default(hash_table, VoxelCoord{coord.x, coord.y, coord.z + 1}, kInvalidIndex), + get_or_default(hash_table, VoxelCoord{coord.x + 1, coord.y, coord.z + 1}, kInvalidIndex), + get_or_default(hash_table, VoxelCoord{coord.x, coord.y + 1, coord.z + 1}, kInvalidIndex) + }; + + // xy-plane + if (is_intersected[2] && neigh_indices[0] != kInvalidIndex && neigh_indices[1] != kInvalidIndex && neigh_indices[2] != kInvalidIndex) { + int4 quad_indices{i, neigh_indices[0], neigh_indices[2], neigh_indices[1]}; + auto tri_indices = quad_to_2tri(dual_vertices, quad_indices); + face_indices.insert(face_indices.end(), tri_indices.begin(), tri_indices.end()); + } + // yz-plane + if (is_intersected[0] && neigh_indices[1] != kInvalidIndex && neigh_indices[3] != kInvalidIndex && neigh_indices[5] != kInvalidIndex) { + int4 quad_indices{i, neigh_indices[1], neigh_indices[5], neigh_indices[3]}; + auto tri_indices = quad_to_2tri(dual_vertices, quad_indices); + face_indices.insert(face_indices.end(), tri_indices.begin(), tri_indices.end()); + } + // xz-plane + if (is_intersected[1] && neigh_indices[0] != kInvalidIndex && neigh_indices[3] != kInvalidIndex && neigh_indices[4] != kInvalidIndex) { + int4 quad_indices{i, neigh_indices[0], neigh_indices[4], neigh_indices[3]}; + auto tri_indices = quad_to_2tri(dual_vertices, quad_indices); + face_indices.insert(face_indices.end(), tri_indices.begin(), tri_indices.end()); + } + } +} + +/** + * Extract flexible dual grid from a triangle mesh. + * + * @param vertices: Tensor of shape (N, 3) containing vertex positions. + * @param faces: Tensor of shape (M, 3) containing triangle vertex indices. + * @param voxel_size: Tensor of shape (3,) containing the voxel size in each dimension. + * @param grid_range: Tensor of shape (2, 3) containing the minimum and maximum coordinates of the grid range. + * @param face_weight: Weight for the face edges in the QEF computation. + * @param boundary_weight: Weight for the boundary edges in the QEF computation. + * @param regularization_weight: Regularization factor to apply to the QEF matrices. + * @param timing: Boolean flag to indicate whether to print timing information. + * + * @return a tuple ((x, y, z), vertices, intersected, faces) containing the remeshed vertices and the corresponding voxel grid. + */ +std::tuple mesh_to_flexible_dual_grid_cpu( + const torch::Tensor& vertices, + const torch::Tensor& faces, + const torch::Tensor& voxel_size, + const torch::Tensor& grid_range, + float face_weight, + float boundary_weight, + float regularization_weight, + bool timing +) { + const int F = faces.size(0); + const float* v_ptr = vertices.data_ptr(); + const int* f_ptr = faces.data_ptr(); + const float* voxel_size_ptr = voxel_size.data_ptr(); + const int* grid_range_ptr = grid_range.data_ptr(); + clock_t start, end; + std::unordered_map hash_table; + std::vector voxels; // Voxel coordinates + std::vector means; // Mean vertex positions for each voxel + std::vector cnt; // Number of intersections for each voxel + std::vector intersected; // Indicate whether edges of voxels intersect with surface + std::vector qefs; // QEF matrices for each voxel + + // Convert tensors to Eigen types + Eigen::Vector3f e_voxel_size(voxel_size_ptr[0], voxel_size_ptr[1], voxel_size_ptr[2]); + Eigen::Vector3i e_grid_min(grid_range_ptr[0], grid_range_ptr[1], grid_range_ptr[2]); + Eigen::Vector3i e_grid_max(grid_range_ptr[3], grid_range_ptr[4], grid_range_ptr[5]); + + // Intersect QEF computation + start = clock(); + std::vector triangles; + triangles.reserve(F * 3); + for (int f = 0; f < F; ++f) { + for (int v = 0; v < 3; ++v) { + triangles.push_back(Eigen::Vector3f( + v_ptr[f_ptr[f * 3 + v] * 3 + 0], + v_ptr[f_ptr[f * 3 + v] * 3 + 1], + v_ptr[f_ptr[f * 3 + v] * 3 + 2] + )); + } + } + intersect_qef(e_voxel_size, e_grid_min, e_grid_max, triangles, hash_table, voxels, means, cnt, intersected, qefs); + end = clock(); + if (timing) std::cout << "Intersect QEF computation took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + + // Face QEF computation + if (face_weight > 0.0f) { + start = clock(); + face_qef(e_voxel_size, e_grid_min, e_grid_max, triangles, hash_table, qefs); + end = clock(); + if (timing) std::cout << "Face QEF computation took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + } + + // Boundary QEF computation + if (boundary_weight > 0.0f) { + start = clock(); + std::map, int> edge_count; + for (int f = 0; f < F; ++f) { + for (int v0 = 0; v0 < 3; ++v0) { + int e0 = f_ptr[f * 3 + v0]; + int e1 = f_ptr[f * 3 + (v0 + 1) % 3]; + if (e0 > e1) std::swap(e0, e1); + edge_count[std::make_pair(e0, e1)]++; + } + } + std::vector boundries; + for (const auto& e : edge_count) { + if (e.second == 1) { + int v0 = e.first.first; + int v1 = e.first.second; + boundries.push_back(Eigen::Vector3f( + v_ptr[v0 * 3 + 0], + v_ptr[v0 * 3 + 1], + v_ptr[v0 * 3 + 2] + )); + boundries.push_back(Eigen::Vector3f( + v_ptr[v1 * 3 + 0], + v_ptr[v1 * 3 + 1], + v_ptr[v1 * 3 + 2] + )); + } + } + boundry_qef(e_voxel_size, e_grid_min, e_grid_max, boundries, boundary_weight, hash_table, qefs); + end = clock(); + if (timing) std::cout << "Boundary QEF computation took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + } + + // Solve the QEF system to obtain final dual vertices + start = clock(); + std::vector dual_vertices(voxels.size()); + for (int i = 0; i < voxels.size(); ++i) { + int3 coord = voxels[i]; + Eigen::Matrix4f Q = qefs[i]; + float min_corner[3] = { + coord.x * e_voxel_size.x(), + coord.y * e_voxel_size.y(), + coord.z * e_voxel_size.z() + }; + float max_corner[3] = { + (coord.x + 1) * e_voxel_size.x(), + (coord.y + 1) * e_voxel_size.y(), + (coord.z + 1) * e_voxel_size.z() + }; + + // Add regularization term + if (regularization_weight > 0.0f) { + Eigen::Vector3f p = means[i] / cnt[i]; + + // Construct the QEF matrix for this vertex + Eigen::Matrix4f Qreg = Eigen::Matrix4f::Zero(); + Qreg.topLeftCorner<3,3>() = Eigen::Matrix3f::Identity(); + Qreg.block<3,1>(0,3) = -p; + Qreg.block<1,3>(3,0) = -p.transpose(); + Qreg(3,3) = p.dot(p); + + Q += regularization_weight * cnt[i] * Qreg; // Scale by regularization weight + } + + // Solve unconstrained + Eigen::Matrix3f A = Q.topLeftCorner<3, 3>(); + Eigen::Vector3f b = -Q.block<3, 1>(0, 3); + Eigen::Vector3f v_new = A.colPivHouseholderQr().solve(b); + + if (!( + v_new.x() >= min_corner[0] && v_new.x() <= max_corner[0] && + v_new.y() >= min_corner[1] && v_new.y() <= max_corner[1] && + v_new.z() >= min_corner[2] && v_new.z() <= max_corner[2] + )) { + // Starting enumeration of constraints + float best = std::numeric_limits::infinity(); + + // Solve single-constraint + auto solve_single_constraint = [&](int fixed_axis) { + int ax1 = (fixed_axis + 1) % 3; + int ax2 = (fixed_axis + 2) % 3; + + Eigen::Matrix2f A; + Eigen::Matrix2f B; + Eigen::Vector2f q, b, x; + + A << Q(ax1, ax1), Q(ax1, ax2), + Q(ax2, ax1), Q(ax2, ax2); + B << Q(ax1, fixed_axis), Q(ax1, 3), + Q(ax2, fixed_axis), Q(ax2, 3); + auto Asol = A.colPivHouseholderQr(); + + // if lower bound + q << min_corner[fixed_axis], 1; + b = -B * q; + x = Asol.solve(b); + if ( + x.x() >= min_corner[ax1] && x.x() <= max_corner[ax1] && + x.y() >= min_corner[ax2] && x.y() <= max_corner[ax2] + ) { + Eigen::Vector4f p; + p[fixed_axis] = min_corner[fixed_axis]; + p[ax1] = x.x(); + p[ax2] = x.y(); + p[3] = 1.0f; + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + + // if upper bound + q << max_corner[fixed_axis], 1; + b = -B * q; + x = Asol.solve(b); + if ( + x.x() >= min_corner[ax1] && x.x() <= max_corner[ax1] && + x.y() >= min_corner[ax2] && x.y() <= max_corner[ax2] + ) { + Eigen::Vector4f p; + p[fixed_axis] = max_corner[fixed_axis]; + p[ax1] = x.x(); + p[ax2] = x.y(); + p[3] = 1.0f; + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + }; + solve_single_constraint(0); // fix x + solve_single_constraint(1); // fix y + solve_single_constraint(2); // fix z + + // Solve two-constraint + auto solve_two_constraint = [&](int free_axis) { + int ax1 = (free_axis + 1) % 3; + int ax2 = (free_axis + 2) % 3; + + float a, x; + Eigen::Vector3f b, q; + + a = Q(free_axis, free_axis); + b << Q(free_axis, ax1), Q(free_axis, ax2), Q(free_axis, 3); + + // if lower-lower bound + q << min_corner[ax1], min_corner[ax2], 1; + x = -(b.dot(q)) / a; + if (x >= min_corner[free_axis] && x <= max_corner[free_axis]) { + Eigen::Vector4f p; + p[free_axis] = x; + p[ax1] = min_corner[ax1]; + p[ax2] = min_corner[ax2]; + p[3] = 1.0f; + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + + // if lower-upper bound + q << min_corner[ax1], max_corner[ax2], 1; + x = -(b.dot(q)) / a; + if (x >= min_corner[free_axis] && x <= max_corner[free_axis]) { + Eigen::Vector4f p; + p[free_axis] = x; + p[ax1] = min_corner[ax1]; + p[ax2] = max_corner[ax2]; + p[3] = 1.0f; + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + + // if upper-lower bound + q << max_corner[ax1], min_corner[ax2], 1; + x = -(b.dot(q)) / a; + if (x >= min_corner[free_axis] && x <= max_corner[free_axis]) { + Eigen::Vector4f p; + p[free_axis] = x; + p[ax1] = max_corner[ax1]; + p[ax2] = min_corner[ax2]; + p[3] = 1.0f; + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + + // if upper-upper bound + q << max_corner[ax1], max_corner[ax2], 1; + x = -(b.dot(q)) / a; + if (x >= min_corner[free_axis] && x <= max_corner[free_axis]) { + Eigen::Vector4f p; + p[free_axis] = x; + p[ax1] = max_corner[ax1]; + p[ax2] = max_corner[ax2]; + p[3] = 1.0f; + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + }; + solve_two_constraint(0); // free x + solve_two_constraint(1); // free y + solve_two_constraint(2); // free z + + // Solve three-constraint + for (int x_constraint = 0; x_constraint < 2; ++x_constraint) { + for (int y_constraint = 0; y_constraint < 2; ++y_constraint) { + for (int z_constraint = 0; z_constraint < 2; ++z_constraint) { + Eigen::Vector4f p; + p[0] = x_constraint ? min_corner[0] : max_corner[0]; + p[1] = y_constraint ? min_corner[1] : max_corner[1]; + p[2] = z_constraint ? min_corner[2] : max_corner[2]; + p[3] = 1.0f; + + float err = p.transpose() * Q * p; + if (err < best) { + best = err; + v_new << p[0], p[1], p[2]; + } + } + } + } + } + + // Store the dual vertex and voxel grid coordinates + dual_vertices[i] = float3{v_new.x(), v_new.y(), v_new.z()}; + } + end = clock(); + if (timing) std::cout << "Dual vertices computation took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + + return std::make_tuple( + torch::from_blob(voxels.data(), {int(voxels .size()), 3}, torch::kInt32).clone(), + torch::from_blob(dual_vertices.data(), {int(dual_vertices.size()), 3}, torch::kFloat32).clone(), + torch::from_blob(intersected.data(), {int(intersected.size()), 3}, torch::kBool).clone() + ); +} + diff --git a/o-voxel/src/convert/volumetic_attr.cpp b/o-voxel/src/convert/volumetic_attr.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9b9e94f143c4c544be443a9e80b7233f84673d33 --- /dev/null +++ b/o-voxel/src/convert/volumetic_attr.cpp @@ -0,0 +1,872 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "api.h" + + +constexpr size_t kInvalidIndex = std::numeric_limits::max(); + + +static bool is_power_of_two(int n) { + return n > 0 && (n & (n - 1)) == 0; +} + + +template +static inline U lerp(const T& a, const T& b, const T& t, const U& val_a, const U& val_b) { + if (a == b) return val_a; // Avoid divide by zero + T alpha = (t - a) / (b - a); + return (1 - alpha) * val_a + alpha * val_b; +} + + +template +static auto get_or_default(const Map& map, const Key& key, const Default& default_val) -> typename Map::mapped_type { + auto it = map.find(key); + return (it != map.end()) ? it->second : default_val; +} + + +// 3D voxel coordinate +struct VoxelCoord { + int x, y, z; + + int& operator[](int i) { + return (&x)[i]; + } + + bool operator==(const VoxelCoord& other) const { + return x == other.x && y == other.y && z == other.z; + } +}; + +// Hash function for VoxelCoord to use in unordered_map +namespace std { +template <> +struct hash { + size_t operator()(const VoxelCoord& v) const { + const std::size_t p1 = 73856093; + const std::size_t p2 = 19349663; + const std::size_t p3 = 83492791; + return (std::size_t)(v.x) * p1 ^ (std::size_t)(v.y) * p2 ^ (std::size_t)(v.z) * p3; + } +}; +} + + +/** + * Compute the Normal Tangent and Bitangent vectors for a triangle. + * + * @param v0 The first vertex of the triangle. + * @param v1 The second vertex of the triangle. + * @param v2 The third vertex of the triangle. + * @param uv0 The texture coordinates of the first vertex. + * @param uv1 The texture coordinates of the second vertex. + * @param uv2 The texture coordinates of the third vertex. + * + * @return A tuple containing: + * - t The tangent vector. + * - b The bitangent vector. + * - n The normal vector. + * - mip_length The norms of the partial derivatives of the 3D coordinates with respect to the 2D texture coordinates. + */ +static std::tuple compute_TBN( + const Eigen::Vector3f& v0, + const Eigen::Vector3f& v1, + const Eigen::Vector3f& v2, + const Eigen::Vector2f& uv0, + const Eigen::Vector2f& uv1, + const Eigen::Vector2f& uv2 +) { + Eigen::Vector3f e1 = v1 - v0; + Eigen::Vector3f e2 = v2 - v0; + Eigen::Vector2f duv1 = uv1 - uv0; + Eigen::Vector2f duv2 = uv2 - uv0; + Eigen::Vector3f n = e1.cross(e2).normalized(); + + float det = duv1.x() * duv2.y() - duv1.y() * duv2.x(); + if (fabs(det) < 1e-6) { + // Use default + Eigen::Vector3f t(1.0f, 0.0f, 0.0f); + Eigen::Vector3f b(0.0f, 1.0f, 0.0f); + Eigen::Vector2f mip_length(1e6, 1e6); + return std::make_tuple(t, b, n, mip_length); + } + + float invDet = 1.0f / det; + Eigen::Vector3f t = (duv2.y() * e1 - duv1.y() * e2); + Eigen::Vector3f b = (duv1.x() * e2 - duv2.x() * e1); + float t_norm = t.norm(); + float b_norm = b.norm(); + t = t / t_norm; + b = b / b_norm; + Eigen::Vector2f mip_length(invDet * t_norm, invDet * b_norm); + + return std::make_tuple(t, b, n, mip_length); +} + + +/** + * Project a point onto a triangle defined by three vertices. + * + * @param p The point to project. + * @param a The first vertex of the triangle. + * @param b The second vertex of the triangle. + * @param c The third vertex of the triangle. + * @param n The normal of the triangle. + * + * @return The projected point represented as barycentric coordinates (u, v, w) and distance from the plane. + */ +static Eigen::Vector4f project_onto_triangle( + const Eigen::Vector3f& p, + const Eigen::Vector3f& a, + const Eigen::Vector3f& b, + const Eigen::Vector3f& c, + const Eigen::Vector3f& n +) { + float d = (p - a).dot(n); + + Eigen::Vector3f p_proj = p - d * n; + Eigen::Vector3f ab = b - a; + Eigen::Vector3f ac = c - a; + Eigen::Vector3f ap = p_proj - a; + + float d00 = ab.dot(ab); + float d01 = ab.dot(ac); + float d11 = ac.dot(ac); + float d20 = ap.dot(ab); + float d21 = ap.dot(ac); + + float denom = d00 * d11 - d01 * d01; + float v = (d11 * d20 - d01 * d21) / denom; + float w = (d00 * d21 - d01 * d20) / denom; + float u = 1.0f - v - w; + + return Eigen::Vector4f(u, v, w, d); +} + + +static inline int wrap_texcoord(const int& x, const int& W, const int& filter) { + if (filter == 0) { // REPEAT + return (x % W + W) % W; + } else if (filter == 1) { // CLAMP_TO_EDGE + return std::max(0, std::min(x, W - 1)); + } else if (filter == 2) { // MIRROR_REPEAT + int period = 2 * W; + int x_mod = (x % period + period) % period; + return (x_mod < W) ? x_mod : (period - x_mod - 1); + } else { + // Default to repeat + return (x % W + W) % W; + } +} + + +static std::vector> build_mipmaps( + const uint8_t* texture, + const int& H, const int& W, const int& C +) { + if (H != W || !is_power_of_two(H)) { + throw std::invalid_argument("Texture width and height must be equal and a power of two."); + } + std::vector> mipmaps; + const uint8_t* cur_map = texture; + int cur_H = H; + int cur_W = W; + int next_H = cur_H >> 1; + int next_W = cur_W >> 1; + while (next_H > 0 && next_W > 0) { + std::vector next_map(next_H * next_W * C); + for (int y = 0; y < next_H; y++) { + for (int x = 0; x < next_W; x++) { + for (int c = 0; c < C; c++) { + size_t sum = 0; + size_t xx = static_cast(x) << 1; + size_t yy = static_cast(y) << 1; + sum += cur_map[yy * static_cast(cur_W) * C + xx * C + c]; + sum += cur_map[(yy + 1) * static_cast(cur_W) * C + xx * C + c]; + sum += cur_map[yy * static_cast(cur_W) * C + (xx + 1) * C + c]; + sum += cur_map[(yy + 1) * static_cast(cur_W) * C + (xx + 1) * C + c]; + next_map[y * next_W * C + x * C + c] = static_cast(sum / 4); + } + } + } + mipmaps.push_back(std::move(next_map)); + cur_map = mipmaps.back().data(); + cur_H = next_H; + cur_W = next_W; + next_H = cur_H >> 1; + next_W = cur_W >> 1; + } + return mipmaps; +} + + +static void sample_texture( + const uint8_t* texture, + const int& H, const int& W, const int& C, + const float& u, const float& v, + const int& filter, const int& wrap, + float* color +) { + float x = u * W; + float y = (1 - v) * H; + if (filter == 0) { // NEAREST + int x_int = floorf(x); + int y_int = floorf(y); + x_int = wrap_texcoord(x_int, W, wrap); + y_int = wrap_texcoord(y_int, H, wrap); + for (int c = 0; c < C; c++) { + color[c] = texture[y_int * W * C + x_int * C + c] / 255.0f; + } + } + else { // LINEAR + int x_low = floorf(x - 0.5); + int x_high = x_low + 1; + int y_low = floorf(y - 0.5); + int y_high = y_low + 1; + float w_x = x - x_low - 0.5; + float w_y = y - y_low - 0.5; + x_low = wrap_texcoord(x_low, W, wrap); + x_high = wrap_texcoord(x_high, W, wrap); + y_low = wrap_texcoord(y_low, H, wrap); + y_high = wrap_texcoord(y_high, H, wrap); + for (int c = 0; c < C; c++) { + color[c] = (1 - w_x) * (1 - w_y) * texture[y_low * W * C + x_low * C + c] + + w_x * (1 - w_y) * texture[y_low * W * C + x_high * C + c] + + (1 - w_x) * w_y * texture[y_high * W * C + x_low * C + c] + + w_x * w_y * texture[y_high * W * C + x_high * C + c]; + color[c] /= 255.0f; + } + } +} + + +static void sample_texture_mipmap( + const uint8_t* texture, + const int& H, const int& W, const int& C, + const std::vector>& mipmaps, + const float& u, const float& v, const float& mip_length, const float& mipLevelOffset, + const int& filter, const int& wrap, + float* color +) { + if (filter == 0) { // NEAREST + sample_texture(texture, H, W, C, u, v, filter, wrap, color); + } + else { // LINEAR + float mip_level = std::log2(mip_length * H) + mipLevelOffset; + if (!std::isfinite(mip_level) || mip_level <= 0 || mipmaps.empty()) { + sample_texture(texture, H, W, C, u, v, filter, wrap, color); + } + else if (mip_level >= mipmaps.size()) { + sample_texture(mipmaps[mipmaps.size() - 1].data(), H >> mipmaps.size(), W >> mipmaps.size(), C, u, v, filter, wrap, color); + } + else { + int lower_mip_level = std::floor(mip_level); + int upper_mip_level = lower_mip_level + 1; + float mip_frac = mip_level - lower_mip_level; + const uint8_t* lower_mip_ptr = lower_mip_level == 0 ? texture : mipmaps[lower_mip_level - 1].data(); + const uint8_t* upper_mip_ptr = mipmaps[upper_mip_level - 1].data(); + int lower_mip_H = H >> lower_mip_level; + int lower_mip_W = W >> lower_mip_level; + int upper_mip_H = H >> upper_mip_level; + int upper_mip_W = W >> upper_mip_level; + std::vector lower_mip_sample(C); + std::vector upper_mip_sample(C); + sample_texture(lower_mip_ptr, lower_mip_H, lower_mip_W, C, u, v, filter, wrap, lower_mip_sample.data()); + sample_texture(upper_mip_ptr, upper_mip_H, upper_mip_W, C, u, v, filter, wrap, upper_mip_sample.data()); + for (int c = 0; c < C; c++) { + color[c] = (1 - mip_frac) * lower_mip_sample[c] + mip_frac * upper_mip_sample[c]; + } + } + } +} + + +static std::tuple, std::vector, std::vector, std::vector, std::vector, std::vector, std::vector> +voxelize_trimesh_pbr_impl( + const float* voxel_size, + const int* grid_range, + const int N_tri, + const float* vertices, + const float* normals, + const float* uvs, + const int* materialIds, + const std::vector baseColorFactor, + const std::vector baseColorTexture, + const std::vector H_bcTex, const std::vector W_bcTex, + const std::vector baseColorTextureFilter, + const std::vector baseColorTextureWrap, + const std::vector metallicFactor, + const std::vector metallicTexture, + const std::vector H_mtlTex, const std::vector W_mtlTex, + const std::vector metallicTextureFilter, + const std::vector metallicTextureWrap, + const std::vector roughnessFactor, + const std::vector roughnessTexture, + const std::vector H_rghTex, const std::vector W_rghTex, + const std::vector roughnessTextureFilter, + const std::vector roughnessTextureWrap, + const std::vector emissiveFactor, + const std::vector emissiveTexture, + const std::vector H_emTex, const std::vector W_emTex, + const std::vector emissiveTextureFilter, + const std::vector emissiveTextureWrap, + const std::vector alphaMode, + const std::vector alphaCutoff, + const std::vector alphaFactor, + const std::vector alphaTexture, + const std::vector H_aTex, const std::vector W_aTex, + const std::vector alphaTextureFilter, + const std::vector alphaTextureWrap, + const std::vector normalTexture, + const std::vector H_nTex, const std::vector W_nTex, + const std::vector normalTextureFilter, + const std::vector normalTextureWrap, + const float mipLevelOffset, + const bool timing +) { + clock_t start, end; + + // Common variables used in the voxelization process + Eigen::Vector3f delta_p(voxel_size[0], voxel_size[1], voxel_size[2]); + Eigen::Vector3i grid_min(grid_range[0], grid_range[1], grid_range[2]); + Eigen::Vector3i grid_max(grid_range[3], grid_range[4], grid_range[5]); + + // Construct Mipmaps + start = clock(); + std::vector>> baseColorMipmaps(baseColorTexture.size()); + std::vector>> metallicMipmaps(metallicTexture.size()); + std::vector>> roughnessMipmaps(roughnessTexture.size()); + std::vector>> emissiveMipmaps(emissiveTexture.size()); + std::vector>> alphaMipmaps(alphaTexture.size()); + std::vector>> normalMipmaps(normalTexture.size()); + for (size_t i = 0; i < baseColorTexture.size(); i++) { + if (baseColorTexture[i] != nullptr && baseColorTextureFilter[i] != 0) { + baseColorMipmaps[i] = build_mipmaps(baseColorTexture[i], H_bcTex[i], W_bcTex[i], 3); + } + } + for (size_t i = 0; i < metallicTexture.size(); i++) { + if (metallicTexture[i] != nullptr && metallicTextureFilter[i] != 0) { + metallicMipmaps[i] = build_mipmaps(metallicTexture[i], H_mtlTex[i], W_mtlTex[i], 1); + } + } + for (size_t i = 0; i < roughnessTexture.size(); i++) { + if (roughnessTexture[i] != nullptr && roughnessTextureFilter[i] != 0) { + roughnessMipmaps[i] = build_mipmaps(roughnessTexture[i], H_rghTex[i], W_rghTex[i], 1); + } + } + for (size_t i = 0; i < emissiveTexture.size(); i++) { + if (emissiveTexture[i] != nullptr && emissiveTextureFilter[i] != 0) { + emissiveMipmaps[i] = build_mipmaps(emissiveTexture[i], H_emTex[i], W_emTex[i], 3); + } + } + for (size_t i = 0; i < alphaTexture.size(); i++) { + if (alphaTexture[i] != nullptr && alphaTextureFilter[i] != 0) { + alphaMipmaps[i] = build_mipmaps(alphaTexture[i], H_aTex[i], W_aTex[i], 1); + } + } + for (size_t i = 0; i < normalTexture.size(); i++) { + if (normalTexture[i] != nullptr && normalTextureFilter[i] != 0) { + normalMipmaps[i] = build_mipmaps(normalTexture[i], H_nTex[i], W_nTex[i], 3); + } + } + end = clock(); + if (timing) std::cout << "Mipmaps construction took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + + // Buffers + std::unordered_map hash_table; + std::vector coords; + std::vector buf_weights; + std::vector buf_baseColors; + std::vector buf_metallics; + std::vector buf_roughnesses; + std::vector buf_emissives; + std::vector buf_alphas; + std::vector buf_normals; + + // Enumerate all triangles + start = clock(); + for (size_t tid = 0; tid < N_tri; tid++) { + // COMPUTE COMMON TRIANGLE PROPERTIES + // Move vertices to origin using bbox + size_t ptr = tid * 9; + Eigen::Vector3f v0(vertices[ptr], vertices[ptr + 1], vertices[ptr + 2]); + Eigen::Vector3f v1(vertices[ptr + 3], vertices[ptr + 4], vertices[ptr + 5]); + Eigen::Vector3f v2(vertices[ptr + 6], vertices[ptr + 7], vertices[ptr + 8]); + // Normals + Eigen::Vector3f n0(normals[ptr], normals[ptr + 1], normals[ptr + 2]); + Eigen::Vector3f n1(normals[ptr + 3], normals[ptr + 4], normals[ptr + 5]); + Eigen::Vector3f n2(normals[ptr + 6], normals[ptr + 7], normals[ptr + 8]); + // UV vectors + ptr = tid * 6; + Eigen::Vector2f uv0(uvs[ptr], uvs[ptr + 1]); + Eigen::Vector2f uv1(uvs[ptr + 2], uvs[ptr + 3]); + Eigen::Vector2f uv2(uvs[ptr + 4], uvs[ptr + 5]); + // TBN + auto tbn = compute_TBN(v0, v1, v2, uv0, uv1, uv2); + Eigen::Vector3f t = std::get<0>(tbn); + Eigen::Vector3f b = std::get<1>(tbn); + Eigen::Vector3f n = std::get<2>(tbn); + Eigen::Vector2f v_mip_length = std::get<3>(tbn); + float mip_length = delta_p.maxCoeff() / std::sqrt(v_mip_length.x() * v_mip_length.y()); + // Material ID + int mid = materialIds[tid]; + + // Find intersected voxel for each triangle + std::unordered_set intersected_voxels; + // Scan-line algorithm to find intersections with the voxel grid from three directions + /* + t0 + | \ + | t1 + | / + t2 + */ + auto scan_line_fill = [&] (const int ax2) { + int ax0 = (ax2 + 1) % 3; + int ax1 = (ax2 + 2) % 3; + + // Canonical question + std::array t = { + Eigen::Vector3d(v0[ax0], v0[ax1], v0[ax2]), + Eigen::Vector3d(v1[ax0], v1[ax1], v1[ax2]), + Eigen::Vector3d(v2[ax0], v2[ax1], v2[ax2]) + }; + std::sort(t.begin(), t.end(), [](const Eigen::Vector3d& a, const Eigen::Vector3d& b) { return a.y() < b.y(); }); + + // Scan-line algorithm + int start = std::clamp(int(t[0].y() / voxel_size[ax1]), grid_min[ax1], grid_max[ax1] - 1); + int mid = std::clamp(int(t[1].y() / voxel_size[ax1]), grid_min[ax1], grid_max[ax1] - 1); + int end = std::clamp(int(t[2].y() / voxel_size[ax1]), grid_min[ax1], grid_max[ax1] - 1); + + auto scan_line_half = [&] (const int row_start, const int row_end, const Eigen::Vector3d t0, const Eigen::Vector3d t1, const Eigen::Vector3d t2) { + /* + t0 + | \ + t3-t4 + | \ + t1---t2 + */ + for (int y_idx = row_start; y_idx < row_end; ++y_idx) { + double y = (y_idx + 1) * voxel_size[ax1]; + Eigen::Vector2d t3 = lerp(t0.y(), t1.y(), y, Eigen::Vector2d(t0.x(), t0.z()), Eigen::Vector2d(t1.x(), t1.z())); + Eigen::Vector2d t4 = lerp(t0.y(), t2.y(), y, Eigen::Vector2d(t0.x(), t0.z()), Eigen::Vector2d(t2.x(), t2.z())); + if (t3.x() > t4.x()) std::swap(t3, t4); + int line_start = std::clamp(int(t3.x() / voxel_size[ax0]), grid_min[ax0], grid_max[ax0] - 1); + int line_end = std::clamp(int(t4.x() / voxel_size[ax0]), grid_min[ax0], grid_max[ax0] - 1); + for (int x_idx = line_start; x_idx < line_end; ++x_idx) { + double x = (x_idx + 1) * voxel_size[ax0]; + double z = lerp(t3.x(), t4.x(), x, t3.y(), t4.y()); + int z_idx = int(z / voxel_size[ax2]); + if (z_idx >= grid_min[ax2] && z_idx < grid_max[ax2]) { + // For 4-connected voxels + for (int dx = 0; dx < 2; ++dx) { + for (int dy = 0; dy < 2; ++dy) { + VoxelCoord coord; + coord[ax0] = x_idx + dx; coord[ax1] = y_idx + dy; coord[ax2] = z_idx; + intersected_voxels.insert(coord); + } + } + } + } + } + }; + scan_line_half(start, mid, t[0], t[1], t[2]); + scan_line_half(mid, end, t[2], t[1], t[0]); + }; + scan_line_fill(0); + scan_line_fill(1); + scan_line_fill(2); + + // For all intersected voxels, ample texture and write to voxel grid + for (auto voxel : intersected_voxels) { + int x = voxel.x; + int y = voxel.y; + int z = voxel.z; + + // Compute barycentric coordinates and weight + Eigen::Vector4f barycentric = project_onto_triangle( + Eigen::Vector3f((x + 0.5f) * delta_p.x(), (y + 0.5f) * delta_p.y(), (z + 0.5f) * delta_p.z()), + v0, v1, v2, n + ); + Eigen::Vector2f uv = { + barycentric.x() * uv0.x() + barycentric.y() * uv1.x() + barycentric.z() * uv2.x(), + barycentric.x() * uv0.y() + barycentric.y() * uv1.y() + barycentric.z() * uv2.y() + }; + Eigen::Vector3f int_n = { + barycentric.x() * n0.x() + barycentric.y() * n1.x() + barycentric.z() * n2.x(), + barycentric.x() * n0.y() + barycentric.y() * n1.y() + barycentric.z() * n2.y(), + barycentric.x() * n0.z() + barycentric.y() * n1.z() + barycentric.z() * n2.z() + }; + float weight = 1 - barycentric.w(); + + /// base color + float baseColor[3] = {1, 1, 1}; + if (baseColorTexture[mid]) { + sample_texture_mipmap( + baseColorTexture[mid], + H_bcTex[mid], W_bcTex[mid], 3, + baseColorMipmaps[mid], + uv.x(), uv.y(), mip_length, mipLevelOffset, + baseColorTextureFilter[mid], baseColorTextureWrap[mid], + baseColor + ); + } + baseColor[0] *= baseColorFactor[mid][0]; + baseColor[1] *= baseColorFactor[mid][1]; + baseColor[2] *= baseColorFactor[mid][2]; + + /// metallic + float metallic = 1.0f; + if (metallicTexture[mid]) { + sample_texture_mipmap( + metallicTexture[mid], + H_mtlTex[mid], W_mtlTex[mid], 1, + metallicMipmaps[mid], + uv.x(), uv.y(), mip_length, mipLevelOffset, + metallicTextureFilter[mid], metallicTextureWrap[mid], + &metallic + ); + } + metallic *= metallicFactor[mid]; + + /// roughness + float roughness = 1.0f; + if (roughnessTexture[mid]) { + sample_texture_mipmap( + roughnessTexture[mid], + H_rghTex[mid], W_rghTex[mid], 1, + roughnessMipmaps[mid], + uv.x(), uv.y(), mip_length, mipLevelOffset, + roughnessTextureFilter[mid], roughnessTextureWrap[mid], + &roughness + ); + } + roughness *= roughnessFactor[mid]; + + /// emissive + float emissive[3] = {1, 1, 1}; + if (emissiveTexture[mid]) { + sample_texture_mipmap( + emissiveTexture[mid], + H_emTex[mid], W_emTex[mid], 3, + roughnessMipmaps[mid], + uv.x(), uv.y(), mip_length, mipLevelOffset, + emissiveTextureFilter[mid], emissiveTextureWrap[mid], + emissive + ); + } + emissive[0] *= emissiveFactor[mid][0]; + emissive[1] *= emissiveFactor[mid][1]; + emissive[2] *= emissiveFactor[mid][2]; + + /// alpha + float alpha = 1.0f; + if (alphaMode[mid] != 0) { + if (alphaTexture[mid]) { + sample_texture_mipmap( + alphaTexture[mid], + H_aTex[mid], W_aTex[mid], 1, + alphaMipmaps[mid], + uv.x(), uv.y(), mip_length, mipLevelOffset, + alphaTextureFilter[mid], alphaTextureWrap[mid], + &alpha + ); + } + alpha *= alphaFactor[mid]; + if (alphaMode[mid] == 1) { // MASK + alpha = alpha < alphaCutoff[mid] ? 0.0f : 1.0f; + } + } + + /// normal + float normal[3] = {int_n.x(), int_n.y(), int_n.z()}; + if (normalTexture[mid]) { + sample_texture_mipmap( + normalTexture[mid], + H_nTex[mid], W_nTex[mid], 3, + normalMipmaps[mid], + uv.x(), uv.y(), mip_length, mipLevelOffset, + normalTextureFilter[mid], normalTextureWrap[mid], + normal + ); + normal[0] = normal[0] * 2 - 1; + normal[1] = normal[1] * 2 - 1; + normal[2] = normal[2] * 2 - 1; + Eigen::Vector3f _n = (normal[0] * t + normal[1] * b + normal[2] * int_n).normalized(); + normal[0] = _n.x(); + normal[1] = _n.y(); + normal[2] = _n.z(); + } + + // Write to voxel grid + auto coord = VoxelCoord{x-grid_min.x(), y-grid_min.y(), z-grid_min.z()}; + auto kv = hash_table.find(coord); + if (kv == hash_table.end()) { + hash_table[coord] = coords.size(); + coords.push_back({coord.x, coord.y, coord.z}); + buf_weights.push_back(weight); + buf_baseColors.push_back(Eigen::Vector3f(baseColor[0], baseColor[1], baseColor[2]) * weight); + buf_metallics.push_back(metallic * weight); + buf_roughnesses.push_back(roughness * weight); + buf_emissives.push_back(Eigen::Vector3f(emissive[0], emissive[1], emissive[2]) * weight); + buf_alphas.push_back(alpha * weight); + buf_normals.push_back(Eigen::Vector3f(normal[0], normal[1], normal[2]) * weight); + } + else { + auto i = kv->second; + buf_weights[i] += weight; + buf_baseColors[i] += Eigen::Vector3f(baseColor[0], baseColor[1], baseColor[2]) * weight; + buf_metallics[i] += metallic * weight; + buf_roughnesses[i] += roughness * weight; + buf_emissives[i] += Eigen::Vector3f(emissive[0], emissive[1], emissive[2]) * weight; + buf_alphas[i] += alpha * weight; + buf_normals[i] += Eigen::Vector3f(normal[0], normal[1], normal[2]) * weight; + } + } + } + end = clock(); + if (timing) std::cout << "Voxelization took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + + // Normalize buffers + start = clock(); + std::vector out_coord(coords.size() * 3); + std::vector out_baseColor(coords.size() * 3); + std::vector out_metallic(coords.size()); + std::vector out_roughness(coords.size()); + std::vector out_emissive(coords.size() * 3); + std::vector out_alpha(coords.size()); + std::vector out_normal(coords.size() * 3); + for (int i = 0; i < coords.size(); i++) { + out_coord[i * 3 + 0] = coords[i].x; + out_coord[i * 3 + 1] = coords[i].y; + out_coord[i * 3 + 2] = coords[i].z; + out_baseColor[i * 3 + 0] = buf_baseColors[i].x() / buf_weights[i]; + out_baseColor[i * 3 + 1] = buf_baseColors[i].y() / buf_weights[i]; + out_baseColor[i * 3 + 2] = buf_baseColors[i].z() / buf_weights[i]; + out_metallic[i] = buf_metallics[i] / buf_weights[i]; + out_roughness[i] = buf_roughnesses[i] / buf_weights[i]; + out_emissive[i * 3 + 0] = buf_emissives[i].x() / buf_weights[i]; + out_emissive[i * 3 + 1] = buf_emissives[i].y() / buf_weights[i]; + out_emissive[i * 3 + 2] = buf_emissives[i].z() / buf_weights[i]; + out_alpha[i] = buf_alphas[i] / buf_weights[i]; + out_normal[i * 3 + 0] = buf_normals[i].x() / buf_weights[i]; + out_normal[i * 3 + 1] = buf_normals[i].y() / buf_weights[i]; + out_normal[i * 3 + 2] = buf_normals[i].z() / buf_weights[i]; + } + end = clock(); + if (timing) std::cout << "Normalization took " << double(end - start) / CLOCKS_PER_SEC << " seconds." << std::endl; + + return std::make_tuple( + std::move(out_coord), + std::move(out_baseColor), + std::move(out_metallic), + std::move(out_roughness), + std::move(out_emissive), + std::move(out_alpha), + std::move(out_normal) + ); +} + + +std::tuple +textured_mesh_to_volumetric_attr_cpu( + const torch::Tensor& voxel_size, + const torch::Tensor& grid_range, + const torch::Tensor& vertices, + const torch::Tensor& normals, + const torch::Tensor& uvs, + const torch::Tensor& materialIds, + const std::vector& baseColorFactor, + const std::vector& baseColorTexture, + const std::vector& baseColorTextureFilter, + const std::vector& baseColorTextureWrap, + const std::vector& metallicFactor, + const std::vector& metallicTexture, + const std::vector& metallicTextureFilter, + const std::vector& metallicTextureWrap, + const std::vector& roughnessFactor, + const std::vector& roughnessTexture, + const std::vector& roughnessTextureFilter, + const std::vector& roughnessTextureWrap, + const std::vector& emissiveFactor, + const std::vector& emissiveTexture, + const std::vector& emissiveTextureFilter, + const std::vector& emissiveTextureWrap, + const std::vector& alphaMode, + const std::vector& alphaCutoff, + const std::vector& alphaFactor, + const std::vector& alphaTexture, + const std::vector& alphaTextureFilter, + const std::vector& alphaTextureWrap, + const std::vector& normalTexture, + const std::vector& normalTextureFilter, + const std::vector& normalTextureWrap, + const float mipLevelOffset, + const bool timing +) { + auto N_mat = baseColorFactor.size(); + int N_tri = vertices.size(0); + + // Get the size of the input tensors + std::vector baseColorFactor_ptrs(N_mat); + std::vector baseColorTexture_ptrs(N_mat); + std::vector H_bcTex(N_mat), W_bcTex(N_mat); + std::vector metallicFactor_vec(N_mat); + std::vector metallicTexture_ptrs(N_mat); + std::vector H_mtlTex(N_mat), W_mtlTex(N_mat); + std::vector roughnessFactor_vec(N_mat); + std::vector roughnessTexture_ptrs(N_mat); + std::vector H_rghTex(N_mat), W_rghTex(N_mat); + std::vector emissiveFactor_ptrs(N_mat); + std::vector emissiveTexture_ptrs(N_mat); + std::vector H_emTex(N_mat), W_emTex(N_mat); + std::vector alphaMode_vec(N_mat); + std::vector alphaCutoff_vec(N_mat); + std::vector alphaFactor_vec(N_mat); + std::vector alphaTexture_ptrs(N_mat); + std::vector H_aTex(N_mat), W_aTex(N_mat); + std::vector normalTexture_ptrs(N_mat); + std::vector H_nTex(N_mat), W_nTex(N_mat); + + for (int i = 0; i < N_mat; ++i) { + baseColorFactor_ptrs[i] = baseColorFactor[i].contiguous().data_ptr(); + if (baseColorTexture[i].numel() > 0) { + baseColorTexture_ptrs[i] = baseColorTexture[i].contiguous().data_ptr(); + H_bcTex[i] = baseColorTexture[i].size(0); + W_bcTex[i] = baseColorTexture[i].size(1); + } + else { + baseColorTexture_ptrs[i] = nullptr; + H_bcTex[i] = 0; + W_bcTex[i] = 0; + } + metallicFactor_vec[i] = metallicFactor[i]; + if (metallicTexture[i].numel() > 0) { + metallicTexture_ptrs[i] = metallicTexture[i].contiguous().data_ptr(); + H_mtlTex[i] = metallicTexture[i].size(0); + W_mtlTex[i] = metallicTexture[i].size(1); + } + else { + metallicTexture_ptrs[i] = nullptr; + H_mtlTex[i] = 0; + W_mtlTex[i] = 0; + } + roughnessFactor_vec[i] = roughnessFactor[i]; + if (roughnessTexture[i].numel() > 0) { + roughnessTexture_ptrs[i] = roughnessTexture[i].contiguous().data_ptr(); + H_rghTex[i] = roughnessTexture[i].size(0); + W_rghTex[i] = roughnessTexture[i].size(1); + } + else { + roughnessTexture_ptrs[i] = nullptr; + H_rghTex[i] = 0; + W_rghTex[i] = 0; + } + emissiveFactor_ptrs[i] = emissiveFactor[i].contiguous().data_ptr(); + if (emissiveTexture[i].numel() > 0) { + emissiveTexture_ptrs[i] = emissiveTexture[i].contiguous().data_ptr(); + H_emTex[i] = emissiveTexture[i].size(0); + W_emTex[i] = emissiveTexture[i].size(1); + } + else { + emissiveTexture_ptrs[i] = nullptr; + H_emTex[i] = 0; + W_emTex[i] = 0; + } + alphaMode_vec[i] = alphaMode[i]; + alphaCutoff_vec[i] = alphaCutoff[i]; + alphaFactor_vec[i] = alphaFactor[i]; + if (alphaTexture[i].numel() > 0) { + alphaTexture_ptrs[i] = alphaTexture[i].contiguous().data_ptr(); + H_aTex[i] = alphaTexture[i].size(0); + W_aTex[i] = alphaTexture[i].size(1); + } + else { + alphaTexture_ptrs[i] = nullptr; + H_aTex[i] = 0; + W_aTex[i] = 0; + } + if (normalTexture[i].numel() > 0) { + normalTexture_ptrs[i] = normalTexture[i].contiguous().data_ptr(); + H_nTex[i] = normalTexture[i].size(0); + W_nTex[i] = normalTexture[i].size(1); + } + else { + normalTexture_ptrs[i] = nullptr; + H_nTex[i] = 0; + W_nTex[i] = 0; + } + } + + auto outputs = voxelize_trimesh_pbr_impl( + voxel_size.contiguous().data_ptr(), + grid_range.contiguous().data_ptr(), + N_tri, + vertices.contiguous().data_ptr(), + normals.contiguous().data_ptr(), + uvs.contiguous().data_ptr(), + materialIds.contiguous().data_ptr(), + baseColorFactor_ptrs, + baseColorTexture_ptrs, + H_bcTex, W_bcTex, + baseColorTextureFilter, baseColorTextureWrap, + metallicFactor_vec, + metallicTexture_ptrs, + H_mtlTex, W_mtlTex, + metallicTextureFilter, metallicTextureWrap, + roughnessFactor_vec, + roughnessTexture_ptrs, + H_rghTex, W_rghTex, + roughnessTextureFilter, roughnessTextureWrap, + emissiveFactor_ptrs, + emissiveTexture_ptrs, + H_emTex, W_emTex, + emissiveTextureFilter, emissiveTextureWrap, + alphaMode_vec, + alphaCutoff_vec, + alphaFactor_vec, + alphaTexture_ptrs, + H_aTex, W_aTex, + alphaTextureFilter, alphaTextureWrap, + normalTexture_ptrs, + H_nTex, W_nTex, + normalTextureFilter, normalTextureWrap, + mipLevelOffset, + timing + ); + + std::vector coords_vec = std::get<0>(outputs); + std::vector baseColors_vec = std::get<1>(outputs); + std::vector metallics_vec = std::get<2>(outputs); + std::vector roughnesses_vec = std::get<3>(outputs); + std::vector emissives_vec = std::get<4>(outputs); + std::vector alphas_vec = std::get<5>(outputs); + std::vector normals_vec = std::get<6>(outputs); + + // Create output tensors + auto out_coords = torch::from_blob(coords_vec.data(), {static_cast(coords_vec.size() / 3), 3}, torch::kInt32).clone(); + auto out_baseColors = torch::from_blob(baseColors_vec.data(), {static_cast(baseColors_vec.size() / 3), 3}, torch::kFloat32).clone(); + auto out_metallics = torch::from_blob(metallics_vec.data(), {static_cast(metallics_vec.size())}, torch::kFloat32).clone(); + auto out_roughnesses = torch::from_blob(roughnesses_vec.data(), {static_cast(roughnesses_vec.size())}, torch::kFloat32).clone(); + auto out_emissives = torch::from_blob(emissives_vec.data(), {static_cast(emissives_vec.size() / 3), 3}, torch::kFloat32).clone(); + auto out_alphas = torch::from_blob(alphas_vec.data(), {static_cast(alphas_vec.size())}, torch::kFloat32).clone(); + auto out_normals = torch::from_blob(normals_vec.data(), {static_cast(normals_vec.size() / 3), 3}, torch::kFloat32).clone(); + + return std::make_tuple( + out_coords, + out_baseColors, + out_metallics, + out_roughnesses, + out_emissives, + out_alphas, + out_normals + ); +} + diff --git a/o-voxel/src/ext.cpp b/o-voxel/src/ext.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d95d3e826f8ffcbb8f366e76f61176911f3be3d4 --- /dev/null +++ b/o-voxel/src/ext.cpp @@ -0,0 +1,37 @@ +#include +#include "hash/api.h" +#include "convert/api.h" +#include "io/api.h" +#include "serialize/api.h" +#include "rasterize/api.h" + + +PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { + // Hash functions + m.def("hashmap_insert_cuda", &hashmap_insert_cuda); + m.def("hashmap_lookup_cuda", &hashmap_lookup_cuda); + m.def("hashmap_insert_3d_cuda", &hashmap_insert_3d_cuda); + m.def("hashmap_lookup_3d_cuda", &hashmap_lookup_3d_cuda); + m.def("hashmap_insert_3d_idx_as_val_cuda", &hashmap_insert_3d_idx_as_val_cuda); + // Convert functions + m.def("mesh_to_flexible_dual_grid_cpu", &mesh_to_flexible_dual_grid_cpu, py::call_guard()); + m.def("textured_mesh_to_volumetric_attr_cpu", &textured_mesh_to_volumetric_attr_cpu, py::call_guard()); + // Serialization functions + m.def("z_order_encode_cpu", &z_order_encode_cpu, py::call_guard()); + m.def("z_order_decode_cpu", &z_order_decode_cpu, py::call_guard()); + m.def("hilbert_encode_cpu", &hilbert_encode_cpu, py::call_guard()); + m.def("hilbert_decode_cpu", &hilbert_decode_cpu, py::call_guard()); + m.def("z_order_encode_cuda", &z_order_encode_cuda, py::call_guard()); + m.def("z_order_decode_cuda", &z_order_decode_cuda, py::call_guard()); + m.def("hilbert_encode_cuda", &hilbert_encode_cuda, py::call_guard()); + m.def("hilbert_decode_cuda", &hilbert_decode_cuda, py::call_guard()); + // IO functions + m.def("encode_sparse_voxel_octree_cpu", &encode_sparse_voxel_octree_cpu, py::call_guard()); + m.def("decode_sparse_voxel_octree_cpu", &decode_sparse_voxel_octree_cpu, py::call_guard()); + m.def("encode_sparse_voxel_octree_attr_parent_cpu", &encode_sparse_voxel_octree_attr_parent_cpu, py::call_guard()); + m.def("decode_sparse_voxel_octree_attr_parent_cpu", &decode_sparse_voxel_octree_attr_parent_cpu, py::call_guard()); + m.def("encode_sparse_voxel_octree_attr_neighbor_cpu", &encode_sparse_voxel_octree_attr_neighbor_cpu, py::call_guard()); + m.def("decode_sparse_voxel_octree_attr_neighbor_cpu", &decode_sparse_voxel_octree_attr_neighbor_cpu, py::call_guard()); + // Rasterization functions + m.def("rasterize_voxels_cuda", &rasterize_voxels_cuda); +} \ No newline at end of file diff --git a/o-voxel/src/hash/api.h b/o-voxel/src/hash/api.h new file mode 100644 index 0000000000000000000000000000000000000000..521f96f9aec2eb6d13a27ee4e6a2040ca90bbc44 --- /dev/null +++ b/o-voxel/src/hash/api.h @@ -0,0 +1,111 @@ +/* + * Hashmap + * + * Copyright (C) 2025, Jianfeng XIANG + * All rights reserved. + * + * Licensed under The MIT License [see LICENSE for details] + * + * Written by Jianfeng XIANG + */ + +#pragma once +#include + + +#define BLOCK_SIZE 256 + + +/** + * Insert keys into the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param keys [M] uint32/uint64 tensor containing the keys to be inserted + * @param values [M] uint32/uint64 tensor containing the values to be inserted + */ +void hashmap_insert_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& keys, + const torch::Tensor& values +); + + +/** + * Lookup keys in the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param keys [M] uint32/uint64 tensor containing the keys to be looked up + * @return [M] uint32/uint64 tensor containing the values of the keys + */ +torch::Tensor hashmap_lookup_cuda( + const torch::Tensor& hashmap_keys, + const torch::Tensor& hashmap_values, + const torch::Tensor& keys +); + + +/** + * Insert 3D coordinates into the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param coords [M, 4] int32 tensor containing the keys to be inserted + * @param values [M] uint32/uint64 tensor containing the values to be inserted + * @param W the number of width dimensions + * @param H the number of height dimensions + * @param D the number of depth dimensions + */ +void hashmap_insert_3d_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& coords, + const torch::Tensor& values, + int W, + int H, + int D +); + + +/** + * Lookup 3D coordinates in the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param coords [M, 4] int32 tensor containing the keys to be looked up + * @param W the number of width dimensions + * @param H the number of height dimensions + * @param D the number of depth dimensions + * + * @return [M] uint32/uint64 tensor containing the values of the keys + */ +torch::Tensor hashmap_lookup_3d_cuda( + const torch::Tensor& hashmap_keys, + const torch::Tensor& hashmap_values, + const torch::Tensor& coords, + int W, + int H, + int D +); + + +/** + * Insert 3D coordinates into the hashmap using index as value + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param coords [M, 4] int32 tensor containing the keys to be inserted + * @param W the number of width dimensions + * @param H the number of height dimensions + * @param D the number of depth dimensions + */ +void hashmap_insert_3d_idx_as_val_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& coords, + int W, + int H, + int D +); diff --git a/o-voxel/src/hash/hash.cu b/o-voxel/src/hash/hash.cu new file mode 100644 index 0000000000000000000000000000000000000000..4163b0a8037d51384e312eb1276a3a1176511d89 --- /dev/null +++ b/o-voxel/src/hash/hash.cu @@ -0,0 +1,446 @@ +#include +#include +#include + +#include "api.h" +#include "hash.cuh" + + +template +static __global__ void hashmap_insert_cuda_kernel( + const size_t N, + const size_t M, + K* __restrict__ hashmap_keys, + V* __restrict__ hashmap_values, + const K* __restrict__ keys, + const V* __restrict__ values +) { + size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < M) + { + K key = keys[thread_id]; + V value = values[thread_id]; + linear_probing_insert(hashmap_keys, hashmap_values, key, value, N); + } +} + + +template +static void dispatch_hashmap_insert_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& keys, + const torch::Tensor& values +) { + hashmap_insert_cuda_kernel<<< + (keys.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, + BLOCK_SIZE + >>>( + hashmap_keys.size(0), + keys.size(0), + hashmap_keys.data_ptr(), + hashmap_values.data_ptr(), + keys.data_ptr(), + values.data_ptr() + ); +} + + +/** + * Insert keys into the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param keys [M] uint32/uint64 tensor containing the keys to be inserted + * @param values [M] uint32/uint64 tensor containing the values to be inserted + */ +void hashmap_insert_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& keys, + const torch::Tensor& values +) { + // Dispatch to 32-bit or 64-bit kernel + if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt32) { + TORCH_CHECK(keys.dtype() == torch::kUInt32, "Keys must be uint32"); + TORCH_CHECK(values.dtype() == torch::kUInt32, "Values must be uint32"); + dispatch_hashmap_insert_cuda(hashmap_keys, hashmap_values, keys, values); + } + else if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt64) { + TORCH_CHECK(keys.dtype() == torch::kUInt32, "Keys must be uint32"); + TORCH_CHECK(values.dtype() == torch::kUInt64, "Values must be uint64"); + dispatch_hashmap_insert_cuda(hashmap_keys, hashmap_values, keys, values); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt32) { + TORCH_CHECK(keys.dtype() == torch::kUInt64, "Keys must be uint64"); + TORCH_CHECK(values.dtype() == torch::kUInt32, "Values must be uint32"); + dispatch_hashmap_insert_cuda(hashmap_keys, hashmap_values, keys, values); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt64) { + TORCH_CHECK(keys.dtype() == torch::kUInt64, "Keys must be uint64"); + TORCH_CHECK(values.dtype() == torch::kUInt64, "Values must be uint64"); + dispatch_hashmap_insert_cuda(hashmap_keys, hashmap_values, keys, values); + } + else { + TORCH_CHECK(false, "Unsupported data type"); + } +} + + +template +static __global__ void hashmap_lookup_cuda_kernel( + const size_t N, + const size_t M, + const K * __restrict__ hashmap_keys, + const V * __restrict__ hashmap_values, + const K * __restrict__ keys, + V * __restrict__ values +) { + size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < M) { + K key = keys[thread_id]; + values[thread_id] = linear_probing_lookup(hashmap_keys, hashmap_values, key, N); + } +} + + +template +static void dispatch_hashmap_lookup_cuda( + const torch::Tensor& hashmap_keys, + const torch::Tensor& hashmap_values, + const torch::Tensor& keys, + torch::Tensor& values +) { + hashmap_lookup_cuda_kernel<<< + (keys.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, + BLOCK_SIZE + >>>( + hashmap_keys.size(0), + keys.size(0), + hashmap_keys.data_ptr(), + hashmap_values.data_ptr(), + keys.data_ptr(), + values.data_ptr() + ); +} + + +/** + * Lookup keys in the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param keys [M] uint32/uint64 tensor containing the keys to be looked up + * @return [M] uint32/uint64 tensor containing the values of the keys + */ +torch::Tensor hashmap_lookup_cuda( + const torch::Tensor& hashmap_keys, + const torch::Tensor& hashmap_values, + const torch::Tensor& keys +) { + // Allocate output tensor + auto output = torch::empty({keys.size(0)}, torch::dtype(hashmap_values.dtype()).device(hashmap_values.device())); + + // Dispatch to 32-bit or 64-bit kernel + if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt32) { + TORCH_CHECK(keys.dtype() == torch::kUInt32, "Keys must be uint32"); + TORCH_CHECK(output.dtype() == torch::kUInt32, "Output must be uint32"); + dispatch_hashmap_lookup_cuda(hashmap_keys, hashmap_values, keys, output); + } + else if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt64) { + TORCH_CHECK(keys.dtype() == torch::kUInt32, "Keys must be uint32"); + TORCH_CHECK(output.dtype() == torch::kUInt64, "Output must be uint64"); + dispatch_hashmap_lookup_cuda(hashmap_keys, hashmap_values, keys, output); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt32) { + TORCH_CHECK(keys.dtype() == torch::kUInt64, "Keys must be uint64"); + TORCH_CHECK(output.dtype() == torch::kUInt32, "Output must be uint32"); + dispatch_hashmap_lookup_cuda(hashmap_keys, hashmap_values, keys, output); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt64) { + TORCH_CHECK(keys.dtype() == torch::kUInt64, "Keys must be uint64"); + TORCH_CHECK(output.dtype() == torch::kUInt64, "Output must be uint64"); + dispatch_hashmap_lookup_cuda(hashmap_keys, hashmap_values, keys, output); + } + else { + TORCH_CHECK(false, "Unsupported data type"); + } + + return output; +} + + +template +static __global__ void hashmap_insert_3d_cuda_kernel( + const size_t N, + const size_t M, + const int W, + const int H, + const int D, + K* __restrict__ hashmap_keys, + V* __restrict__ hashmap_values, + const int32_t* __restrict__ coords, + const V* __restrict__ values +) { + size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < M) { + int4 coord = reinterpret_cast(coords)[thread_id]; + int b = coord.x; + int x = coord.y; + int y = coord.z; + int z = coord.w; + size_t flat_idx = (size_t)b * W * H * D + (size_t)x * H * D + (size_t)y * D + z; + K key = static_cast(flat_idx); + V value = values[thread_id]; + linear_probing_insert(hashmap_keys, hashmap_values, key, value, N); + } +} + + +template +static void dispatch_hashmap_insert_3d_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& coords, + const torch::Tensor& values, + int W, int H, int D +) { + hashmap_insert_3d_cuda_kernel<<< + (coords.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, + BLOCK_SIZE + >>>( + hashmap_keys.size(0), + coords.size(0), + W, H, D, + hashmap_keys.data_ptr(), + hashmap_values.data_ptr(), + coords.data_ptr(), + values.data_ptr() + ); +} + + +/** + * Insert 3D coordinates into the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param coords [M, 4] int32 tensor containing the keys to be inserted + * @param values [M] uint32/uint64 tensor containing the values to be inserted + * @param W the number of width dimensions + * @param H the number of height dimensions + * @param D the number of depth dimensions + */ +void hashmap_insert_3d_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& coords, + const torch::Tensor& values, + int W, + int H, + int D +) { + TORCH_CHECK(coords.dtype() == torch::kInt32, "Coords must be int32"); + + // Dispatch to 32-bit or 64-bit kernel + if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt32) { + TORCH_CHECK(values.dtype() == torch::kUInt32, "Values must be uint32"); + dispatch_hashmap_insert_3d_cuda(hashmap_keys, hashmap_values, coords, values, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt64) { + TORCH_CHECK(values.dtype() == torch::kUInt64, "Values must be uint64"); + dispatch_hashmap_insert_3d_cuda(hashmap_keys, hashmap_values, coords, values, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt32) { + TORCH_CHECK(values.dtype() == torch::kUInt32, "Values must be uint32"); + dispatch_hashmap_insert_3d_cuda(hashmap_keys, hashmap_values, coords, values, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt64) { + TORCH_CHECK(values.dtype() == torch::kUInt64, "Values must be uint64"); + dispatch_hashmap_insert_3d_cuda(hashmap_keys, hashmap_values, coords, values, W, H, D); + } + else { + TORCH_CHECK(false, "Unsupported data type"); + } +} + + +template +static __global__ void hashmap_lookup_3d_cuda_kernel( + const size_t N, + const size_t M, + const int W, + const int H, + const int D, + const K* __restrict__ hashmap_keys, + const V* __restrict__ hashmap_values, + const int32_t* __restrict__ coords, + V* __restrict__ values +) { + const size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < M) { + int4 coord = reinterpret_cast(coords)[thread_id]; + int b = coord.x; + int x = coord.y; + int y = coord.z; + int z = coord.w; + if (x < 0 || x >= W || y < 0 || y >= H || z < 0 || z >= D) { + values[thread_id] = std::numeric_limits::max(); + return; + } + size_t flat_idx = (size_t)b * W * H * D + (size_t)x * H * D + (size_t)y * D + z; + K key = static_cast(flat_idx); + values[thread_id] = linear_probing_lookup(hashmap_keys, hashmap_values, key, N); + } +} + + +template +static void dispatch_hashmap_lookup_3d_cuda( + const torch::Tensor& hashmap_keys, + const torch::Tensor& hashmap_values, + const torch::Tensor& coords, + torch::Tensor& values, + int W, int H, int D +) { + hashmap_lookup_3d_cuda_kernel<<< + (coords.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, + BLOCK_SIZE + >>>( + hashmap_keys.size(0), + coords.size(0), + W, H, D, + hashmap_keys.data_ptr(), + hashmap_values.data_ptr(), + coords.data_ptr(), + values.data_ptr() + ); +} + + +/** + * Lookup 3D coordinates in the hashmap + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param coords [M, 4] int32 tensor containing the keys to be looked up + * @param W the number of width dimensions + * @param H the number of height dimensions + * @param D the number of depth dimensions + * + * @return [M] uint32/uint64 tensor containing the values of the keys + */ +torch::Tensor hashmap_lookup_3d_cuda( + const torch::Tensor& hashmap_keys, + const torch::Tensor& hashmap_values, + const torch::Tensor& coords, + int W, + int H, + int D +) { + // Allocate output tensor + auto output = torch::empty({coords.size(0)}, torch::dtype(hashmap_values.dtype()).device(hashmap_values.device())); + + // Dispatch to 32-bit or 64-bit kernel + if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt32) { + dispatch_hashmap_lookup_3d_cuda(hashmap_keys, hashmap_values, coords, output, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt64) { + dispatch_hashmap_lookup_3d_cuda(hashmap_keys, hashmap_values, coords, output, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt32) { + dispatch_hashmap_lookup_3d_cuda(hashmap_keys, hashmap_values, coords, output, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt64) { + dispatch_hashmap_lookup_3d_cuda(hashmap_keys, hashmap_values, coords, output, W, H, D); + } + else { + TORCH_CHECK(false, "Unsupported data type"); + } + + return output; +} + + +template +static __global__ void hashmap_insert_3d_idx_as_val_cuda_kernel( + const size_t N, + const size_t M, + const int W, + const int H, + const int D, + K* __restrict__ hashmap, + V* __restrict__ values, + const int32_t* __restrict__ coords +) { + const size_t thread_id = blockIdx.x * blockDim.x + threadIdx.x; + if (thread_id < M) { + int4 coord = reinterpret_cast(coords)[thread_id]; + int b = coord.x; + int x = coord.y; + int y = coord.z; + int z = coord.w; + size_t flat_idx = (size_t)b * W * H * D + (size_t)x * H * D + (size_t)y * D + z; + K key = static_cast(flat_idx); + V value = static_cast(thread_id); + linear_probing_insert(hashmap, values, key, value, N); + } +} + + +template +static void dispatch_hashmap_insert_3d_idx_as_val_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& coords, + int W, int H, int D +) { + hashmap_insert_3d_idx_as_val_cuda_kernel<<< + (coords.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, + BLOCK_SIZE + >>>( + hashmap_keys.size(0), + coords.size(0), + W, H, D, + hashmap_keys.data_ptr(), + hashmap_values.data_ptr(), + coords.data_ptr() + ); +} + + +/** + * Insert 3D coordinates into the hashmap using index as value + * + * @param hashmap_keys [N] uint32/uint64 tensor containing the hashmap keys + * @param hashmap_values [N] uint32/uint64 tensor containing the hashmap values + * @param coords [M, 4] int32 tensor containing the keys to be inserted + * @param W the number of width dimensions + * @param H the number of height dimensions + * @param D the number of depth dimensions + */ +void hashmap_insert_3d_idx_as_val_cuda( + torch::Tensor& hashmap_keys, + torch::Tensor& hashmap_values, + const torch::Tensor& coords, + int W, + int H, + int D +) { + // Dispatch to 32-bit or 64-bit kernel + if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt32) { + dispatch_hashmap_insert_3d_idx_as_val_cuda(hashmap_keys, hashmap_values, coords, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt32 && hashmap_values.dtype() == torch::kUInt64) { + dispatch_hashmap_insert_3d_idx_as_val_cuda(hashmap_keys, hashmap_values, coords, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt32) { + dispatch_hashmap_insert_3d_idx_as_val_cuda(hashmap_keys, hashmap_values, coords, W, H, D); + } + else if (hashmap_keys.dtype() == torch::kUInt64 && hashmap_values.dtype() == torch::kUInt64) { + dispatch_hashmap_insert_3d_idx_as_val_cuda(hashmap_keys, hashmap_values, coords, W, H, D); + } + else { + TORCH_CHECK(false, "Unsupported data type"); + } +} \ No newline at end of file diff --git a/o-voxel/src/hash/hash.cuh b/o-voxel/src/hash/hash.cuh new file mode 100644 index 0000000000000000000000000000000000000000..302ad5b836aae22f982d84384a27b64dd0dcc9a1 --- /dev/null +++ b/o-voxel/src/hash/hash.cuh @@ -0,0 +1,87 @@ +// 32 bit Murmur3 hash +__forceinline__ __device__ size_t hash(uint32_t k, size_t N) { + k ^= k >> 16; + k *= 0x85ebca6b; + k ^= k >> 13; + k *= 0xc2b2ae35; + k ^= k >> 16; + return k % N; +} + + +// 64 bit Murmur3 hash +__forceinline__ __device__ size_t hash(uint64_t k, size_t N) { + k ^= k >> 33; + k *= 0xff51afd7ed558ccdULL; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53ULL; + k ^= k >> 33; + return k % N; +} + + +template +__forceinline__ __device__ void linear_probing_insert( + K* hashmap_keys, + V* hashmap_values, + const K key, + const V value, + const size_t N +) { + size_t slot = hash(key, N); + while (true) { + K prev = atomicCAS(&hashmap_keys[slot], std::numeric_limits::max(), key); + if (prev == std::numeric_limits::max() || prev == key) { + hashmap_values[slot] = value; + return; + } + slot = slot + 1; + if (slot >= N) slot = 0; + } +} + + +template +__forceinline__ __device__ void linear_probing_insert( + uint64_t* hashmap_keys, + V* hashmap_values, + const uint64_t key, + const V value, + const size_t N +) { + size_t slot = hash(key, N); + while (true) { + uint64_t prev = atomicCAS( + reinterpret_cast(&hashmap_keys[slot]), + static_cast(std::numeric_limits::max()), + static_cast(key) + ); + if (prev == std::numeric_limits::max() || prev == key) { + hashmap_values[slot] = value; + return; + } + slot = (slot + 1) % N; + } +} + + +template +__forceinline__ __device__ V linear_probing_lookup( + const K* hashmap_keys, + const V* hashmap_values, + const K key, + const size_t N +) { + size_t slot = hash(key, N); + while (true) { + K prev = hashmap_keys[slot]; + if (prev == std::numeric_limits::max()) { + return std::numeric_limits::max(); + } + if (prev == key) { + return hashmap_values[slot]; + } + slot = slot + 1; + if (slot >= N) slot = 0; + } +} diff --git a/o-voxel/src/io/api.h b/o-voxel/src/io/api.h new file mode 100644 index 0000000000000000000000000000000000000000..38c78095f6c7b11d23cccae5290aeabb0745d259 --- /dev/null +++ b/o-voxel/src/io/api.h @@ -0,0 +1,109 @@ +/* + * Efficient Sparse Voxel storage as Sparse Voxel Zip files (.svz) + * + * Copyright (C) 2025, Jianfeng XIANG + * All rights reserved. + * + * Licensed under The MIT License [see LICENSE for details] + * + * Written by Jianfeng XIANG + */ + +#pragma once +#include +#include + + +/** + * Encode a list of sparse voxel morton codes into a sparse voxel octree + * NOTE: The input indices must be sorted in ascending order + * + * @param codes [N] uint32 tensor containing the morton codes + * @param depth The depth of the sparse voxel octree + * + * @return uint8 tensor containing the sparse voxel octree + */ +torch::Tensor encode_sparse_voxel_octree_cpu( + const torch::Tensor& codes, + const uint32_t depth +); + + +/** + * Decode a sparse voxel octree into a list of sparse voxel morton codes + * + * @param octree uint8 tensor containing the sparse voxel octree + * @param depth The depth of the sparse voxel octree + * + * @return [N] uint32 tensor containing the morton codes + * The codes are sorted in ascending order + */ +torch::Tensor decode_sparse_voxel_octree_cpu( + const torch::Tensor& octree, + const uint32_t depth +); + + + +/** + * Encode the attribute of a sparse voxel octree into deltas from its parent node. + * + * @param octree uint8 tensor containing the sparse voxel octree + * @param depth The depth of the sparse voxel octree + * @param attr [N, C] tensor containing the attribute of each sparse voxel + * + * @return uint8 tensor containing the deltas + */ +torch::Tensor encode_sparse_voxel_octree_attr_parent_cpu( + const torch::Tensor& octree, + const uint32_t depth, + const torch::Tensor& attr +); + + +/** + * Decode the attribute of a sparse voxel octree from its parent node and its deltas. + * + * @param octree uint8 tensor containing the sparse voxel octree + * @param depth The depth of the sparse voxel octree + * @param delta uint8 tensor containing the deltas + * + * @return [N, C] tensor containing the attribute of each sparse voxel + */ +torch::Tensor decode_sparse_voxel_octree_attr_parent_cpu( + const torch::Tensor& octree, + const uint32_t depth, + const torch::Tensor& delta +); + + +/** + * Encode the attribute of a sparse voxel octree into deltas from its neighbors. + * + * @param coord [N, 3] tensor containing the coordinates of each sparse voxel + * @param res The resolution of the sparse voxel grid + * @param attr [N, C] tensor containing the attribute of each sparse voxel + * + * @return uint8 tensor containing the deltas + */ +torch::Tensor encode_sparse_voxel_octree_attr_neighbor_cpu( + const torch::Tensor& coord, + const uint32_t res, + const torch::Tensor& attr +); + + +/** + * Decode the attribute of a sparse voxel octree from its neighbors and deltas. + * + * @param coord [N, 3] tensor containing the coordinates of each sparse voxel + * @param res The resolution of the sparse voxel grid + * @param delta [N, C] tensor containing the deltas + * + * @return [N, C] tensor containing the attribute of each sparse voxel + */ +torch::Tensor decode_sparse_voxel_octree_attr_neighbor_cpu( + const torch::Tensor& coord, + const uint32_t res, + const torch::Tensor& delta +); diff --git a/o-voxel/src/io/filter_neighbor.cpp b/o-voxel/src/io/filter_neighbor.cpp new file mode 100644 index 0000000000000000000000000000000000000000..550988b4fec2ecd7bf5e7d23a8862dc38da4444e --- /dev/null +++ b/o-voxel/src/io/filter_neighbor.cpp @@ -0,0 +1,178 @@ +#include +#include "api.h" + +#include +#include +#include + + +/** + * Encode the attribute of a sparse voxel octree into deltas from its neighbors. + * + * @param coord [N, 3] tensor containing the coordinates of each sparse voxel + * @param res The resolution of the sparse voxel grid + * @param attr [N, C] tensor containing the attribute of each sparse voxel + * + * @return uint8 tensor containing the deltas + */ +torch::Tensor encode_sparse_voxel_octree_attr_neighbor_cpu( + const torch::Tensor& coord, + const uint32_t res, + const torch::Tensor& attr +) { + size_t N = coord.size(0); + size_t C = attr.size(1); + int* coord_data = coord.data_ptr(); + uint8_t* attr_data = attr.data_ptr(); + std::vector buffer(res * res * res * (C + 1), 0); + + // Densify the coordinates + for (int i = 0; i < N; i++) { + int x = coord_data[i * 3 + 0]; + int y = coord_data[i * 3 + 1]; + int z = coord_data[i * 3 + 2]; + int ptr = (z * res * res + y * res + x) * (C + 1); + buffer[ptr + C] = 1; + for (int c = 0; c < C; c++) { + buffer[ptr + c] = attr_data[i * C + c]; + } + } + + // Compute the deltas + for (int z = res-1; z >= 0; z--) { + for (int y = res-1; y >= 0; y--) { + for (int x = res-1; x >= 0; x--) { + int ptr = (z * res * res + y * res + x) * (C + 1); + int neignbor_ptr = -1; + int tmp_ptr; + if (!buffer[ptr + C]) continue; + // x + tmp_ptr = (z * res * res + y * res + (x - 1)) * (C + 1); + if (x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // y + tmp_ptr = (z * res * res + (y - 1) * res + x) * (C + 1); + if (y > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // z + tmp_ptr = ((z - 1) * res * res + y * res + x) * (C + 1); + if (z > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // xy + tmp_ptr = (z * res * res + (y - 1) * res + (x - 1)) * (C + 1); + if (y > 0 && x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // xz + tmp_ptr = ((z - 1) * res * res + y * res + (x - 1)) * (C + 1); + if (z > 0 && x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // yz + tmp_ptr = ((z - 1) * res * res + (y - 1) * res + x) * (C + 1); + if (z > 0 && y > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // xyz + tmp_ptr = ((z - 1) * res * res + (y - 1) * res + (x - 1)) * (C + 1); + if (z > 0 && y > 0 && x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + if (neignbor_ptr >= 0) { + for (int c = 0; c < C; c++) { + buffer[ptr + c] -= buffer[neignbor_ptr + c]; + } + } + } + } + } + + // Pack the deltas into a uint8 tensor + torch::Tensor delta = torch::zeros({static_cast(N), static_cast(C)}, torch::dtype(torch::kUInt8)); + uint8_t* delta_data = delta.data_ptr(); + for (int i = 0; i < N; i++) { + int x = coord_data[i * 3 + 0]; + int y = coord_data[i * 3 + 1]; + int z = coord_data[i * 3 + 2]; + int ptr = (z * res * res + y * res + x) * (C + 1); + for (int c = 0; c < C; c++) { + delta_data[i * C + c] = buffer[ptr + c]; + } + } + return delta; +} + + +/** + * Decode the attribute of a sparse voxel octree from its neighbors and deltas. + * + * @param coord [N, 3] tensor containing the coordinates of each sparse voxel + * @param res The resolution of the sparse voxel grid + * @param delta [N, C] tensor containing the deltas + * + * @return [N, C] tensor containing the attribute of each sparse voxel + */ +torch::Tensor decode_sparse_voxel_octree_attr_neighbor_cpu( + const torch::Tensor& coord, + const uint32_t res, + const torch::Tensor& delta +) { + size_t N = coord.size(0); + size_t C = delta.size(1); + int* coord_data = coord.data_ptr(); + uint8_t* delta_data = delta.data_ptr(); + std::vector buffer(res * res * res * (C + 1), 0); + + // Densify the coordinates + for (int i = 0; i < N; i++) { + int x = coord_data[i * 3 + 0]; + int y = coord_data[i * 3 + 1]; + int z = coord_data[i * 3 + 2]; + int ptr = (z * res * res + y * res + x) * (C + 1); + buffer[ptr + C] = 1; + for (int c = 0; c < C; c++) { + buffer[ptr + c] = delta_data[i * C + c]; + } + } + + // Reconstruct the attribute + for (int z = 0; z < res; z++) { + for (int y = 0; y < res; y++) { + for (int x = 0; x < res; x++) { + int ptr = (z * res * res + y * res + x) * (C + 1); + int neignbor_ptr = -1; + int tmp_ptr; + if (!buffer[ptr + C]) continue; + // x + tmp_ptr = (z * res * res + y * res + (x - 1)) * (C + 1); + if (x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // y + tmp_ptr = (z * res * res + (y - 1) * res + x) * (C + 1); + if (y > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // z + tmp_ptr = ((z - 1) * res * res + y * res + x) * (C + 1); + if (z > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // xy + tmp_ptr = (z * res * res + (y - 1) * res + (x - 1)) * (C + 1); + if (y > 0 && x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // xz + tmp_ptr = ((z - 1) * res * res + y * res + (x - 1)) * (C + 1); + if (z > 0 && x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // yz + tmp_ptr = ((z - 1) * res * res + (y - 1) * res + x) * (C + 1); + if (z > 0 && y > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + // xyz + tmp_ptr = ((z - 1) * res * res + (y - 1) * res + (x - 1)) * (C + 1); + if (z > 0 && y > 0 && x > 0 && buffer[tmp_ptr + C]) neignbor_ptr = tmp_ptr; + if (neignbor_ptr >= 0) { + for (int c = 0; c < C; c++) { + buffer[ptr + c] += buffer[neignbor_ptr + c]; + } + } + } + } + } + + // Pack the attribute into a uint8 tensor + torch::Tensor attr = torch::zeros({static_cast(N), static_cast(C)}, torch::dtype(torch::kUInt8)); + uint8_t* attr_data = attr.data_ptr(); + for (int i = 0; i < N; i++) { + int x = coord_data[i * 3 + 0]; + int y = coord_data[i * 3 + 1]; + int z = coord_data[i * 3 + 2]; + int ptr = (z * res * res + y * res + x) * (C + 1); + for (int c = 0; c < C; c++) { + attr_data[i * C + c] = buffer[ptr + c]; + } + } + return attr; +} diff --git a/o-voxel/src/io/filter_parent.cpp b/o-voxel/src/io/filter_parent.cpp new file mode 100644 index 0000000000000000000000000000000000000000..a392d781a56ff3e756bd0188c963107468d4cb04 --- /dev/null +++ b/o-voxel/src/io/filter_parent.cpp @@ -0,0 +1,165 @@ +#include +#include "api.h" +#include "lut.h" + +#include +#include +#include + + +std::vector encode_recursive( + const uint8_t* svo, + const uint32_t depth, + const uint8_t* attr, + const size_t C, + uint32_t& svo_ptr, + uint32_t& attr_ptr, + uint32_t& delta_ptr, + uint32_t self_delta_ptr, + uint32_t cur_depth, + uint8_t* delta +) { + std::vector node_attr(C, 0); + if (cur_depth == depth) { + // Leaf node + for (size_t i = 0; i < C; i++) { + node_attr[i] = attr[attr_ptr + i]; + if (self_delta_ptr != 0 || cur_depth == 0) { + delta[self_delta_ptr + i] = node_attr[i]; + } + } + attr_ptr += C; + } + else { + // Internal node + uint8_t node = svo[svo_ptr]; + uint32_t child_delta_ptr = delta_ptr; + uint8_t cnt = lut_1cnt[node]; + svo_ptr++; + delta_ptr += C * (cnt - 1); + for (uint8_t i = 0; i < cnt; i++) { + auto child_attr = encode_recursive( + svo, depth, attr, C, svo_ptr, attr_ptr, delta_ptr, i == cnt-1 ? 0 : child_delta_ptr+i*C, cur_depth+1, delta + ); + for (size_t j = 0; j < C; j++) { + if (i == 0) { + node_attr[j] = child_attr[j]; + } + else { + delta[child_delta_ptr + (i-1)*C + j] = child_attr[j] - delta[child_delta_ptr + (i-1)*C + j]; + } + } + } + if (self_delta_ptr != 0 || cur_depth == 0) { + for (size_t i = 0; i < C; i++) { + delta[self_delta_ptr + i] = node_attr[i]; + } + } + } + return node_attr; +} + + +/** + * Encode the attribute of a sparse voxel octree into deltas from its parent node. + * + * @param octree uint8 tensor containing the sparse voxel octree + * @param depth The depth of the sparse voxel octree + * @param attr [N, C] tensor containing the attribute of each sparse voxel + * + * @return uint8 tensor containing the deltas + */ +torch::Tensor encode_sparse_voxel_octree_attr_parent_cpu( + const torch::Tensor& octree, + const uint32_t depth, + const torch::Tensor& attr +) { + size_t N_leaf = attr.size(0); + size_t N_node = octree.size(0); + size_t C = attr.size(1); + uint8_t* octree_data = octree.data_ptr(); + uint8_t* attr_data = attr.data_ptr(); + + torch::Tensor delta = torch::zeros({static_cast(N_leaf), static_cast(C)}, torch::kUInt8); + uint32_t svo_ptr = 0; + uint32_t attr_ptr = 0; + uint32_t delta_ptr = static_cast(C); + encode_recursive(octree_data, depth, attr_data, C, svo_ptr, attr_ptr, delta_ptr, 0, 0, delta.data_ptr()); + + return delta; +} + + +void decode_recursive( + const uint8_t* svo, + const uint32_t depth, + const uint8_t* delta, + const size_t C, + uint32_t& svo_ptr, + uint32_t& attr_ptr, + uint32_t& delta_ptr, + uint32_t cur_depth, + uint8_t* cur_attr, + uint8_t* attr +) { + if (cur_depth == depth) { + // Leaf node + for (size_t i = 0; i < C; i++) { + attr[attr_ptr + i] = cur_attr[i]; + } + attr_ptr += C; + } + else { + // Internal node + uint8_t node = svo[svo_ptr]; + uint32_t child_delta_ptr = delta_ptr; + std::vector child_attr(cur_attr, cur_attr + C); + uint8_t cnt = lut_1cnt[node]; + svo_ptr++; + delta_ptr += C * (cnt - 1); + for (uint8_t i = 0; i < cnt; i++) { + for (size_t j = 0; j < C; j++) { + if (i > 0) { + child_attr[j] += delta[child_delta_ptr + (i-1)*C + j]; + } + } + decode_recursive( + svo, depth, delta, C, svo_ptr, attr_ptr, delta_ptr, cur_depth+1, child_attr.data(), attr + ); + } + } +} + + +/** + * Decode the attribute of a sparse voxel octree from its parent node and its deltas. + * + * @param octree uint8 tensor containing the sparse voxel octree + * @param depth The depth of the sparse voxel octree + * @param delta uint8 tensor containing the deltas + * + * @return [N, C] tensor containing the attribute of each sparse voxel + */ +torch::Tensor decode_sparse_voxel_octree_attr_parent_cpu( + const torch::Tensor& octree, + const uint32_t depth, + const torch::Tensor& delta +) { + size_t N_node = octree.size(0); + size_t N_leaf = delta.size(0); + size_t C = delta.size(1); + uint8_t* octree_data = octree.data_ptr(); + uint8_t* delta_data = delta.data_ptr(); + + torch::Tensor attr = torch::zeros({static_cast(N_leaf), static_cast(C)}, torch::kUInt8); + uint32_t svo_ptr = 0; + uint32_t attr_ptr = 0; + uint32_t delta_ptr = static_cast(C); + + // Recursively decode the attribute + decode_recursive( + octree_data, depth, delta_data, C, svo_ptr, attr_ptr, delta_ptr, 0, delta_data, attr.data_ptr() + ); + + return attr; +} diff --git a/o-voxel/src/io/lut.h b/o-voxel/src/io/lut.h new file mode 100644 index 0000000000000000000000000000000000000000..27deaa3210f75895f9f6c40a72a15760006dfd45 --- /dev/null +++ b/o-voxel/src/io/lut.h @@ -0,0 +1,19 @@ +#pragma once + +#include + +// np.array([bin(i).count('1') for i in range(256)], dtype=np.uint8) +uint8_t lut_1cnt[256] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, + 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, + 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 1, 2, + 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, + 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, + 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, + 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, + 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, + 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, + 5, 6, 6, 7, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, + 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; diff --git a/o-voxel/src/io/svo.cpp b/o-voxel/src/io/svo.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7e8215c2c01ea482f0a0ec1a7c0c7b71e2491a3c --- /dev/null +++ b/o-voxel/src/io/svo.cpp @@ -0,0 +1,138 @@ +#include +#include "api.h" + +#include +#include + + +/** + * Encode a list of sparse voxel morton codes into a sparse voxel octree + * NOTE: The input indices must be sorted in ascending order + * + * @param codes [N] uint32 tensor containing the morton codes + * @param depth The depth of the sparse voxel octree + * + * @return uint8 tensor containing the sparse voxel octree + */ +torch::Tensor encode_sparse_voxel_octree_cpu( + const torch::Tensor& codes, + const uint32_t depth +) { + size_t N_leaf = codes.size(0); + int* codes_data = codes.data_ptr(); + + std::vector svo; + std::vector stack(depth-1); + std::vector insert_stack(depth); + std::vector stack_ptr(depth); + uint32_t code, insert_from; + + // Root node + svo.push_back(0); + stack_ptr[0] = 0; + + // Iterate over all codes and encode them into SVO + for (int i = 0; i < N_leaf; i++) { + code = codes_data[i]; + + // Convert code to insert stack (3bit per level) + for (uint32_t j = 0; j < depth; j++) { + insert_stack[j] = (code >> (3*(depth-1-j))) & 0x7; + } + + // Compare insert stack to stack to determine which level to insert + if (i == 0) { + // First code, insert at level 0 + insert_from = 0; + } + else { + // Compare insert stack to stack + for (insert_from = 0; insert_from < depth-1; insert_from++) { + if (insert_stack[insert_from] != stack[insert_from]) { + break; + } + } + } + + // Insert new nodes from insert_from to depth-1 + for (uint32_t j = insert_from; j < depth; j++) { + // Add new node to SVO + if (j > insert_from) { + svo.push_back(0); + stack_ptr[j] = svo.size()-1; + } + // Update parent pointers + svo[stack_ptr[j]] |= (1 << insert_stack[j]); + // Update stack + if (j < depth-1) { + stack[j] = insert_stack[j]; + } + } + } + + // Convert SVO to tensor + torch::Tensor svo_tensor = torch::from_blob(svo.data(), {static_cast(svo.size())}, torch::kUInt8).clone(); + return svo_tensor; +} + + +void decode_sparse_voxel_octree_cpu_recursive( + const uint8_t* svo, + const uint32_t depth, + uint32_t& ptr, + std::vector& stack, + std::vector& codes +) { + uint8_t node = svo[ptr]; + if (stack.size() == depth-1) { + // Leaf node, add code to list + uint32_t code = 0; + for (uint32_t i = 0; i < depth-1; i++) { + code |= (static_cast(stack[i]) << (3*(depth-1-i))); + } + for (uint8_t i = 0; i < 8; i++) { + if (node & (1 << i)) { + code = (code & ~0x7) | i; + codes.push_back(code); + } + } + ptr++; + } + else { + // Internal node, recurse + ptr++; + for (uint8_t i = 0; i < 8; i++) { + if (node & (1 << i)) { + stack.push_back(i); + decode_sparse_voxel_octree_cpu_recursive(svo, depth, ptr, stack, codes); + stack.pop_back(); + } + } + } +} + + +/** + * Decode a sparse voxel octree into a list of sparse voxel morton codes + * + * @param octree uint8 tensor containing the sparse voxel octree + * @param depth The depth of the sparse voxel octree + * + * @return [N] uint32 tensor containing the morton codes + * The codes are sorted in ascending order + */ +torch::Tensor decode_sparse_voxel_octree_cpu( + const torch::Tensor& octree, + const uint32_t depth +) { + uint8_t* octree_data = octree.data_ptr(); + std::vector codes; + std::vector stack; + stack.reserve(depth-2); + uint32_t ptr = 0; + // Decode SVO into list of codes + decode_sparse_voxel_octree_cpu_recursive(octree_data, depth, ptr, stack, codes); + // Convert codes to tensor + torch::Tensor codes_tensor = torch::from_blob(codes.data(), {static_cast(codes.size())}, torch::kInt32).clone(); + return codes_tensor; +} diff --git a/o-voxel/src/rasterize/api.h b/o-voxel/src/rasterize/api.h new file mode 100644 index 0000000000000000000000000000000000000000..03d640547e7d3b2fa4d48081e9191790c85e87a6 --- /dev/null +++ b/o-voxel/src/rasterize/api.h @@ -0,0 +1,47 @@ +/* + * Sparse Voxel Rasterizer + * + * Copyright (C) 2025, Jianfeng XIANG + * All rights reserved. + * + * Licensed under The MIT License [see LICENSE for details] + * + * Written by Jianfeng XIANG + */ + +#pragma once +#include + + +/** + * Rasterize a sparse voxel octree with CUDA backend + * + * @param positions Tensor of shape (N, 3) containing the positions of the octree nodes in [0, 1]^3 + * @param attrs Tensor of shape (N, 1) containing the attributes of the octree nodes + * @param voxel_size Float containing the size of the voxels + * @param viewmatrix Tensor of shape (4, 4) containing the view matrix + * @param projmatrix Tensor of shape (4, 4) containing the projection matrix + * @param campos Tensor of shape (3) containing the camera position + * @param tan_fovx Float containing the tangent of the horizontal field of view + * @param tan_fovy Float containing the tangent of the vertical field of view + * @param image_height Integer containing the image height + * @param image_width Integer containing the image width + * + * @return A tuple containing: + * - Tensor of shape (C, H, W) containing the output color + * - Tensor of shape (H, W) containing the output depth + * - Tensor of shape (H, W) containing the output alpha + */ +std::tuple +rasterize_voxels_cuda( + const torch::Tensor& positions, + const torch::Tensor& attrs, + const float voxel_size, + const torch::Tensor& viewmatrix, + const torch::Tensor& projmatrix, + const torch::Tensor& campos, + const float tan_fovx, + const float tan_fovy, + const int image_height, + const int image_width +); diff --git a/o-voxel/src/rasterize/auxiliary.h b/o-voxel/src/rasterize/auxiliary.h new file mode 100644 index 0000000000000000000000000000000000000000..260b19f1f1b493fd86dbce4b1ed3fd58b76c009c --- /dev/null +++ b/o-voxel/src/rasterize/auxiliary.h @@ -0,0 +1,285 @@ +#pragma once +#include "config.h" + + +#define BLOCK_SIZE (BLOCK_X * BLOCK_Y) + + +__forceinline__ __device__ float ndc2Pix(float v, int S) +{ + return ((v + 1.0) * S - 1.0) * 0.5; +} + + +__forceinline__ __device__ void getRect(const int4 bbox, uint2& rect_min, uint2& rect_max, dim3 grid) +{ + rect_min = { + min(grid.x, max((int)0, (int)((bbox.x) / BLOCK_X))), + min(grid.y, max((int)0, (int)((bbox.y) / BLOCK_Y))) + }; + rect_max = { + min(grid.x, max((int)0, (int)((bbox.z + BLOCK_X - 1) / BLOCK_X))), + min(grid.y, max((int)0, (int)((bbox.w + BLOCK_Y - 1) / BLOCK_Y))) + }; +} + + +__forceinline__ __device__ float3 normalize(const float3& v) +{ + float inv_norm = 1.0f / sqrt(v.x * v.x + v.y * v.y + v.z * v.z); + return { v.x * inv_norm, v.y * inv_norm, v.z * inv_norm }; +} + + +__forceinline__ __device__ float3 transformPoint4x3(const float3& p, const float* matrix) +{ + float3 transformed = { + matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], + matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], + matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], + }; + return transformed; +} + + +__forceinline__ __device__ float4 transformPoint4x4(const float3& p, const float* matrix) +{ + float4 transformed = { + matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], + matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], + matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z + matrix[14], + matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15] + }; + return transformed; +} + + +__forceinline__ __device__ float3 transformVec4x3(const float3& p, const float* matrix) +{ + float3 transformed = { + matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z, + matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z, + matrix[2] * p.x + matrix[6] * p.y + matrix[10] * p.z, + }; + return transformed; +} + + +__forceinline__ __device__ float3 transformVec4x3Transpose(const float3& p, const float* matrix) +{ + float3 transformed = { + matrix[0] * p.x + matrix[1] * p.y + matrix[2] * p.z, + matrix[4] * p.x + matrix[5] * p.y + matrix[6] * p.z, + matrix[8] * p.x + matrix[9] * p.y + matrix[10] * p.z, + }; + return transformed; +} + + +__forceinline__ __device__ float dnormvdz(float3 v, float3 dv) +{ + float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; + float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); + float dnormvdz = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; + return dnormvdz; +} + + +__forceinline__ __device__ float3 dnormvdv(float3 v, float3 dv) +{ + float sum2 = v.x * v.x + v.y * v.y + v.z * v.z; + float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); + + float3 dnormvdv; + dnormvdv.x = ((+sum2 - v.x * v.x) * dv.x - v.y * v.x * dv.y - v.z * v.x * dv.z) * invsum32; + dnormvdv.y = (-v.x * v.y * dv.x + (sum2 - v.y * v.y) * dv.y - v.z * v.y * dv.z) * invsum32; + dnormvdv.z = (-v.x * v.z * dv.x - v.y * v.z * dv.y + (sum2 - v.z * v.z) * dv.z) * invsum32; + return dnormvdv; +} + + +__forceinline__ __device__ float4 dnormvdv(float4 v, float4 dv) +{ + float sum2 = v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; + float invsum32 = 1.0f / sqrt(sum2 * sum2 * sum2); + + float4 vdv = { v.x * dv.x, v.y * dv.y, v.z * dv.z, v.w * dv.w }; + float vdv_sum = vdv.x + vdv.y + vdv.z + vdv.w; + float4 dnormvdv; + dnormvdv.x = ((sum2 - v.x * v.x) * dv.x - v.x * (vdv_sum - vdv.x)) * invsum32; + dnormvdv.y = ((sum2 - v.y * v.y) * dv.y - v.y * (vdv_sum - vdv.y)) * invsum32; + dnormvdv.z = ((sum2 - v.z * v.z) * dv.z - v.z * (vdv_sum - vdv.z)) * invsum32; + dnormvdv.w = ((sum2 - v.w * v.w) * dv.w - v.w * (vdv_sum - vdv.w)) * invsum32; + return dnormvdv; +} + + +__forceinline__ __device__ float sigmoid(float x) +{ + return 1.0f / (1.0f + expf(-x)); +} + + +__forceinline__ __device__ bool in_frustum(int idx, + const float3& p_orig, + const float* viewmatrix, + const float* projmatrix, + float3& p_view) +{ + // Bring points to screen space + float4 p_hom = transformPoint4x4(p_orig, projmatrix); + float p_w = 1.0f / (p_hom.w + 0.0000001f); + float3 p_proj = { p_hom.x * p_w, p_hom.y * p_w, p_hom.z * p_w }; + p_view = transformPoint4x3(p_orig, viewmatrix); + + if (p_view.z <= 0.2f)// || ((p_proj.x < -1.3 || p_proj.x > 1.3 || p_proj.y < -1.3 || p_proj.y > 1.3))) + { + return false; + } + return true; +} + + +__forceinline__ __device__ uint32_t expandBits(uint32_t v) +{ + v = (v * 0x00010001u) & 0xFF0000FFu; + v = (v * 0x00000101u) & 0x0F00F00Fu; + v = (v * 0x00000011u) & 0xC30C30C3u; + v = (v * 0x00000005u) & 0x49249249u; + return v; +} + + +__forceinline__ __device__ int2 project(const float3& p, const float* matrix, const int& width, const int& height) +{ + float3 p_hom = { + matrix[0] * p.x + matrix[4] * p.y + matrix[8] * p.z + matrix[12], + matrix[1] * p.x + matrix[5] * p.y + matrix[9] * p.z + matrix[13], + matrix[3] * p.x + matrix[7] * p.y + matrix[11] * p.z + matrix[15] + }; + float p_w = 1.0f / (p_hom.z + 0.0000001f); + return { (int)((p_hom.x * p_w + 1.0f) * 0.5f * width), (int)((p_hom.y * p_w + 1.0f) * 0.5f * height) }; +} + + +#define GET_BBOX_FIRST(A, B, C) \ +vertex.x = point.x A half_scale.x; \ +vertex.y = point.y B half_scale.y; \ +vertex.z = point.z C half_scale.z; \ +p_screen = project(vertex, projmatrix, width, height); \ +bbox.x = p_screen.x; \ +bbox.y = p_screen.y; \ +bbox.z = p_screen.x + 1; \ +bbox.w = p_screen.y + 1; + +#define GET_BBOX_OTHER(A, B, C) \ +vertex.x = point.x A half_scale.x; \ +vertex.y = point.y B half_scale.y; \ +vertex.z = point.z C half_scale.z; \ +p_screen = project(vertex, projmatrix, width, height); \ +bbox.x = min(bbox.x, p_screen.x); \ +bbox.y = min(bbox.y, p_screen.y); \ +bbox.z = max(bbox.z, p_screen.x + 1); \ +bbox.w = max(bbox.w, p_screen.y + 1); + + +__forceinline__ __device__ int4 get_bbox( + const float3& point, + const float3& scale, + const float* projmatrix, + const int& width, + const int& height +) { + float3 half_scale = { scale.x * 0.5f, scale.y * 0.5f, scale.z * 0.5f }; + float3 vertex; + int2 p_screen; + int4 bbox; + + GET_BBOX_FIRST(-, -, -); + GET_BBOX_OTHER(+, -, -); + GET_BBOX_OTHER(-, +, -); + GET_BBOX_OTHER(+, +, -); + GET_BBOX_OTHER(-, -, +); + GET_BBOX_OTHER(+, -, +); + GET_BBOX_OTHER(-, +, +); + GET_BBOX_OTHER(+, +, +); + + bbox.x = max(0, bbox.x); + bbox.y = max(0, bbox.y); + bbox.z = min(width, bbox.z); + bbox.w = min(height, bbox.w); + if (bbox.x >= bbox.z || bbox.y >= bbox.w) // bbox is empty + return { 0, 0, 0, 0 }; + return bbox; +} + + +// Fast ray-box intersection, returns the intersection distance +__forceinline__ __device__ float2 get_ray_voxel_intersection( + const float3& ray_origin, + const float3& ray_direction, + const float3& voxel_min, + const float3& voxel_max +) { + // Careful with the division by zero + float3 inv_direction; + inv_direction.x = ray_direction.x == 0.0f ? 1e10f : 1.0f / ray_direction.x; + inv_direction.y = ray_direction.y == 0.0f ? 1e10f : 1.0f / ray_direction.y; + inv_direction.z = ray_direction.z == 0.0f ? 1e10f : 1.0f / ray_direction.z; + float3 t0 = { + (voxel_min.x - ray_origin.x) * inv_direction.x, + (voxel_min.y - ray_origin.y) * inv_direction.y, + (voxel_min.z - ray_origin.z) * inv_direction.z + }; + float3 t1 = { + (voxel_max.x - ray_origin.x) * inv_direction.x, + (voxel_max.y - ray_origin.y) * inv_direction.y, + (voxel_max.z - ray_origin.z) * inv_direction.z + }; + float3 tmin = { + min(t0.x, t1.x), + min(t0.y, t1.y), + min(t0.z, t1.z) + }; + float3 tmax = { + max(t0.x, t1.x), + max(t0.y, t1.y), + max(t0.z, t1.z) + }; + float tmin_max = max(tmin.x, max(tmin.y, tmin.z)); + float tmax_min = min(tmax.x, min(tmax.y, tmax.z)); + return { tmin_max, tmax_min }; +} + + +__forceinline__ __device__ float3 getRayDir( + const uint2& pix, + const int& width, + const int& height, + const float& tan_fovx, + const float& tan_fovy, + const float* viewmatrix +) { + float x = (2.0f * (pix.x + 0.5f) / width - 1.0f) * tan_fovx; + float y = (2.0f * (pix.y + 0.5f) / height - 1.0f) * tan_fovy; + float3 ray_dir = { + viewmatrix[0] * x + viewmatrix[1] * y + viewmatrix[2], + viewmatrix[4] * x + viewmatrix[5] * y + viewmatrix[6], + viewmatrix[8] * x + viewmatrix[9] * y + viewmatrix[10] + }; + return normalize(ray_dir); +} + + +#ifdef DEBUG +#define CHECK_CUDA(...) __VA_ARGS__; {\ +auto ret = cudaDeviceSynchronize(); \ +if (ret != cudaSuccess) { \ +std::cerr << "\n[CUDA ERROR] in " << __FILE__ << "\nLine " << __LINE__ << ": " << cudaGetErrorString(ret); \ +throw std::runtime_error(cudaGetErrorString(ret)); \ +}} +#define DEBUG_PRINT(...) printf(__VA_ARGS__) +#else +#define CHECK_CUDA(...) __VA_ARGS__ +#define DEBUG_PRINT(...) +#endif \ No newline at end of file diff --git a/o-voxel/src/rasterize/config.h b/o-voxel/src/rasterize/config.h new file mode 100644 index 0000000000000000000000000000000000000000..7e931a03f9be89a27418fe0f25eab00b5b368390 --- /dev/null +++ b/o-voxel/src/rasterize/config.h @@ -0,0 +1,5 @@ +#pragma once + +#define BLOCK_X 8 +#define BLOCK_Y 8 +#define MEM_ALIGNMENT 128 diff --git a/o-voxel/src/rasterize/rasterize.cu b/o-voxel/src/rasterize/rasterize.cu new file mode 100644 index 0000000000000000000000000000000000000000..c616a88cb6b7db7f152e8df6b0f040a8d6b27a25 --- /dev/null +++ b/o-voxel/src/rasterize/rasterize.cu @@ -0,0 +1,396 @@ +#include + +#include +#include "cuda_runtime.h" + +#include +namespace cg = cooperative_groups; + +#include "config.h" +#include "auxiliary.h" +#include "api.h" + + +/** + * Preprocess input 3D points + */ +static __global__ void preprocess( + const int num_nodes, + const float* positions, + const float voxel_size, + const float* viewmatrix, + const float* projmatrix, + const int width, + const int height, + const dim3 grid, + int4* bboxes, + float* depths, + uint32_t* tiles_touched +) { + auto idx = cg::this_grid().thread_rank(); + if (idx >= num_nodes) + return; + + // Initialize bboxes and touched tiles to 0. If this isn't changed, + // this voxel will not be processed further. + bboxes[idx] = { 0, 0, 0, 0 }; + tiles_touched[idx] = 0; + + // Perform near culling, quit if outside. + float3 p_orig = { + positions[3 * idx], + positions[3 * idx + 1], + positions[3 * idx + 2] + }; + float3 p_view; + if (!in_frustum(idx, p_orig, viewmatrix, projmatrix, p_view)) + return; + + // Project 8 vertices of the voxel to screen space to find the + // bounding box of the projected points. + float3 scale = { voxel_size, voxel_size, voxel_size }; + int4 bbox = get_bbox(p_orig, scale, projmatrix, width, height); + uint2 rect_min, rect_max; + getRect(bbox, rect_min, rect_max, grid); + if ((rect_max.x - rect_min.x) * (rect_max.y - rect_min.y) == 0) + return; + + // Store some useful helper data for the next steps. + depths[idx] = p_view.z; + bboxes[idx] = bbox; + tiles_touched[idx] = (rect_max.y - rect_min.y) * (rect_max.x - rect_min.x); +} + + +/** + * Generates one key/value pair for all voxel / tile overlaps. + * Run once per voxel (1:N mapping). + * + * @param P Number of points. + * @param grid Grid size. + * @param depths Depths of points. + * @param offsets Offsets for writing keys/values. + * @param bboxes Bounding boxes of voxels. + * @param keys_unsorted Unsorted keys. + * @param values_unsorted Unsorted values. + */ +static __global__ void duplicateWithKeys( + int P, dim3 grid, + const float* depths, + const int64_t* offsets, + const int4* bboxes, + int64_t* keys_unsorted, + uint32_t* values_unsorted +) { + auto idx = cg::this_grid().thread_rank(); + if (idx >= P) + return; + + // Generate no key/value pair for invisible voxels + if (bboxes[idx].w > 0) + { + // Find this voxel's offset in buffer for writing keys/values. + int64_t off = (idx == 0) ? 0 : offsets[idx - 1]; + uint2 rect_min, rect_max; + getRect(bboxes[idx], rect_min, rect_max, grid); + + // For each tile that the bounding rect overlaps, emit a + // key/value pair. The key is | tile ID | depth |, + // and the value is the ID of the voxel. Sorting the values + // with this key yields voxel IDs in a list, such that they + // are first sorted by tile and then by depth. + for (int y = rect_min.y; y < rect_max.y; y++) + { + for (int x = rect_min.x; x < rect_max.x; x++) + { + int64_t key = y * grid.x + x; + key <<= 32; + key |= *((uint32_t*)&depths[idx]); + keys_unsorted[off] = key; + values_unsorted[off] = idx; + off++; + } + } + } +} + + +/** + * Check keys to see if it is at the start/end of one tile's range in the full sorted list. If yes, write start/end of this tile. + * + * @param L Number of points. + * @param point_list_keys List of keys. + * @param ranges Ranges of tiles. + */ +static __global__ void identifyTileRanges(int L, int64_t* point_list_keys, uint2* ranges) +{ + auto idx = cg::this_grid().thread_rank(); + if (idx >= L) + return; + + // Read tile ID from key. Update start/end of tile range if at limit. + int64_t key = point_list_keys[idx]; + uint32_t currtile = key >> 32; + if (idx == 0) + ranges[currtile].x = 0; + else + { + uint32_t prevtile = point_list_keys[idx - 1] >> 32; + if (currtile != prevtile) + { + ranges[prevtile].y = idx; + ranges[currtile].x = idx; + } + } + if (idx == L - 1) + ranges[currtile].y = L; +} + + +/** + * Main rasterization method. Collaboratively works on one tile per + * block, each thread treats one pixel. Alternates between fetching + * and rasterizing data. + * + * @param ranges Ranges of voxel instances for each tile. + * @param point_list List of voxel instances. + * @param C Number of channels. + * @param W Width of the image. + * @param H Height of the image. + * @param cam_pos Camera position. + * @param tan_fovx Tangent of the horizontal field of view. + * @param tan_fovy Tangent of the vertical field of view. + * @param viewmatrix View matrix. + * @param positions Centers of voxels. + * @param attrs Attributes of voxels. + * @param voxel_size Size of voxels. + * @param out_color Output color. + * @param out_depth Output depth. + * @param out_alpha Output alpha. + */ +static __global__ void __launch_bounds__(BLOCK_X * BLOCK_Y) +render( + const uint2* ranges, + const uint32_t* point_list, + const int C, + const int W, + const int H, + const float* cam_pos, + const float tan_fovx, + const float tan_fovy, + const float* viewmatrix, + const float* positions, + const float* attrs, + const float voxel_size, + float* out_color, + float* out_depth, + float* out_alpha +) { + // Identify current tile and associated min/max pixel range. + auto block = cg::this_thread_block(); + uint32_t horizontal_blocks = (W + BLOCK_X - 1) / BLOCK_X; + uint2 pix_min = { block.group_index().x * BLOCK_X, block.group_index().y * BLOCK_Y }; + uint2 pix_max = { min(pix_min.x + BLOCK_X, W), min(pix_min.y + BLOCK_Y , H) }; + uint2 pix = { pix_min.x + block.thread_index().x, pix_min.y + block.thread_index().y }; + uint32_t pix_id = W * pix.y + pix.x; + + // Get ray direction and origin for this pixel. + float3 ray_dir = getRayDir(pix, W, H, tan_fovx, tan_fovy, viewmatrix); + + // Check if this thread is associated with a valid pixel or outside. + bool inside = pix.x < W&& pix.y < H; + // Done threads can help with fetching, but don't rasterize + bool done = !inside; + + // Load start/end range of IDs to process in bit sorted list. + uint2 range = ranges[block.group_index().y * horizontal_blocks + block.group_index().x]; + const int rounds = ((range.y - range.x + BLOCK_SIZE - 1) / BLOCK_SIZE); + int toDo = range.y - range.x; + + // Allocate storage for batches of collectively fetched data. + __shared__ int collected_id[BLOCK_SIZE]; + __shared__ float3 collected_xyz[BLOCK_SIZE]; + + // Initialize helper variables + int hit = -1; + float D; + + // Iterate over batches until all done or range is complete + for (int i = 0; i < rounds; i++, toDo -= BLOCK_SIZE) + { + // End if entire block votes that it is done rasterizing + int num_done = __syncthreads_count(done); + if (num_done == BLOCK_SIZE) + break; + + // Collectively fetch per-voxel data from global to shared + int progress = i * BLOCK_SIZE + block.thread_rank(); + if (range.x + progress < range.y) + { + int coll_id = point_list[range.x + progress]; + collected_id[block.thread_rank()] = coll_id; + collected_xyz[block.thread_rank()] = { + positions[3 * coll_id], + positions[3 * coll_id + 1], + positions[3 * coll_id + 2] + }; + } + block.sync(); + + // Iterate over current batch + for (int j = 0; !done && j < min(BLOCK_SIZE, toDo); j++) + { + // Get ray-voxel intersection + float3 p = collected_xyz[j]; + float3 scale = { voxel_size, voxel_size, voxel_size }; + float3 voxel_min = { p.x - 0.5f * scale.x, p.y - 0.5f * scale.y, p.z - 0.5f * scale.z }; + float3 voxel_max = { p.x + 0.5f * scale.x, p.y + 0.5f * scale.y, p.z + 0.5f * scale.z }; + float2 itsc = get_ray_voxel_intersection(*(float3*)cam_pos, ray_dir, voxel_min, voxel_max); + float itsc_dist = (itsc.y >= itsc.x) ? itsc.y - itsc.x : -1.0f; + if (itsc_dist <= 0.0f) + continue; + + hit = collected_id[j]; + D = itsc.x; + done = true; + } + } + + // All threads that treat valid pixel write out their final + // rendering data to the frame and auxiliary buffers. + if (inside) + { + for (int ch = 0; ch < C; ch++) + if (hit >= 0) out_color[ch * H * W + pix_id] = attrs[hit * C + ch]; + out_depth[pix_id] = D; + out_alpha[pix_id] = hit >= 0 ? 1.0f : 0.0f; + } +} + +void forward( + const int num_nodes, + const int num_channels, + const int width, + const int height, + const float* positions, + const float* attrs, + const float voxel_size, + const float* viewmatrix, + const float* projmatrix, + const float* campos, + const float tan_fovx, + const float tan_fovy, + float* out_color, + float* out_depth, + float* out_alpha +) { + // Parrallel config (2D grid of 2D blocks) + dim3 grid((width + BLOCK_X - 1) / BLOCK_X, (height + BLOCK_Y - 1) / BLOCK_Y, 1); + dim3 block(BLOCK_X, BLOCK_Y, 1); + + // Run preprocessing kernel + auto pt_bboxes = torch::zeros({num_nodes, 4}, torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA)); + auto pt_depths = torch::zeros({num_nodes}, torch::TensorOptions().dtype(torch::kFloat32).device(torch::kCUDA)); + auto pt_tiles_touched = torch::zeros({num_nodes}, torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA)); + preprocess<<<(num_nodes+255)/256, 256>>>( + num_nodes, positions, voxel_size, viewmatrix, projmatrix, width, height, grid, + reinterpret_cast(pt_bboxes.data_ptr()), + pt_depths.data_ptr(), + reinterpret_cast(pt_tiles_touched.data_ptr()) + ); + + // Compute prefix sum over full list of touched tile counts by voxels + // E.g., [2, 3, 0, 2, 1] -> [2, 5, 5, 7, 8] + auto pt_offsets = torch::cumsum(pt_tiles_touched, 0); + + // Retrieve total number of voxel instances to launch + int num_rendered = pt_offsets[num_nodes - 1].item(); + if (num_rendered == 0) return; + + // For each instance to be rendered, produce adequate [ tile | depth ] key + auto pt_keys_unsorted = torch::zeros({num_rendered}, torch::TensorOptions().dtype(torch::kInt64).device(torch::kCUDA)); + auto pt_indices_unsorted = torch::zeros({num_rendered}, torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA)); + duplicateWithKeys<<<(num_nodes+255)/256, 256>>>( + num_nodes, grid, + pt_depths.data_ptr(), + pt_offsets.data_ptr(), + reinterpret_cast(pt_bboxes.data_ptr()), + pt_keys_unsorted.data_ptr(), + reinterpret_cast(pt_indices_unsorted.data_ptr()) + ); + + // Sort complete list of (duplicated) voxel indices by keys + auto pt_sorted = torch::sort(pt_keys_unsorted, 0); + auto pt_keys = std::get<0>(pt_sorted); + auto pt_order = std::get<1>(pt_sorted); + auto pt_indices = torch::index_select(pt_indices_unsorted, 0, pt_order); + + // Identify start and end of per-tile workloads in sorted list + auto tile_ranges = torch::zeros({grid.x * grid.y, 2}, torch::TensorOptions().dtype(torch::kInt32).device(torch::kCUDA)); + identifyTileRanges<<<(num_rendered+255)/256, 256>>>( + num_rendered, + pt_keys.data_ptr(), + reinterpret_cast(tile_ranges.data_ptr()) + ); + + // Let each tile blend its range of voxels independently in parallel + render<<>>( + reinterpret_cast(tile_ranges.data_ptr()), + reinterpret_cast(pt_indices.data_ptr()), + num_channels, width, height, + campos, tan_fovx, tan_fovy, viewmatrix, + positions, attrs, voxel_size, + out_color, out_depth, out_alpha + ); +} + + +std::tuple +rasterize_voxels_cuda( + const torch::Tensor& positions, + const torch::Tensor& attrs, + const float voxel_size, + const torch::Tensor& viewmatrix, + const torch::Tensor& projmatrix, + const torch::Tensor& campos, + const float tan_fovx, + const float tan_fovy, + const int image_height, + const int image_width +) { + // Sizes + const int P = positions.size(0); + const int C = attrs.size(1); + const int H = image_height; + const int W = image_width; + + // Types + torch::TensorOptions float_opts = torch::TensorOptions().dtype(torch::kFloat32).device(positions.device()); + torch::TensorOptions byte_opts = torch::TensorOptions().dtype(torch::kUInt8).device(positions.device()); + + // Allocate output tensors + torch::Tensor out_color = torch::zeros({C, H, W}, float_opts); + torch::Tensor out_depth = torch::zeros({H, W}, float_opts); + torch::Tensor out_alpha = torch::zeros({H, W}, float_opts); + + // Call Forward + if (P > 0) { + forward( + P, C, W, H, + positions.contiguous().data_ptr(), + attrs.contiguous().data_ptr(), + voxel_size, + viewmatrix.contiguous().data_ptr(), + projmatrix.contiguous().data_ptr(), + campos.contiguous().data_ptr(), + tan_fovx, tan_fovy, + out_color.contiguous().data_ptr(), + out_depth.contiguous().data_ptr(), + out_alpha.contiguous().data_ptr() + ); + } + + return std::make_tuple( + out_color, out_depth, out_alpha + ); +} diff --git a/o-voxel/src/serialize/api.cu b/o-voxel/src/serialize/api.cu new file mode 100644 index 0000000000000000000000000000000000000000..b86749b638e016f66a25da82329c17dc85077791 --- /dev/null +++ b/o-voxel/src/serialize/api.cu @@ -0,0 +1,180 @@ +#include +#include "api.h" +#include "z_order.h" +#include "hilbert.h" + + +torch::Tensor +z_order_encode_cpu( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +) { + // Allocate output tensor + torch::Tensor codes = torch::empty_like(x, torch::dtype(torch::kInt32)); + + // Call CUDA kernel + CPU::z_order_encode( + x.size(0), + reinterpret_cast(x.contiguous().data_ptr()), + reinterpret_cast(y.contiguous().data_ptr()), + reinterpret_cast(z.contiguous().data_ptr()), + reinterpret_cast(codes.data_ptr()) + ); + + return codes; +} + + +std::tuple +z_order_decode_cpu( + const torch::Tensor& codes +) { + // Allocate output tensors + torch::Tensor x = torch::empty_like(codes, torch::dtype(torch::kInt32)); + torch::Tensor y = torch::empty_like(codes, torch::dtype(torch::kInt32)); + torch::Tensor z = torch::empty_like(codes, torch::dtype(torch::kInt32)); + + // Call CUDA kernel + CPU::z_order_decode( + codes.size(0), + reinterpret_cast(codes.contiguous().data_ptr()), + reinterpret_cast(x.data_ptr()), + reinterpret_cast(y.data_ptr()), + reinterpret_cast(z.data_ptr()) + ); + + return std::make_tuple(x, y, z); +} + + +torch::Tensor +hilbert_encode_cpu( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +) { + // Allocate output tensor + torch::Tensor codes = torch::empty_like(x); + + // Call CUDA kernel + CPU::hilbert_encode( + x.size(0), + reinterpret_cast(x.contiguous().data_ptr()), + reinterpret_cast(y.contiguous().data_ptr()), + reinterpret_cast(z.contiguous().data_ptr()), + reinterpret_cast(codes.data_ptr()) + ); + + return codes; +} + + +std::tuple +hilbert_decode_cpu( + const torch::Tensor& codes +) { + // Allocate output tensors + torch::Tensor x = torch::empty_like(codes); + torch::Tensor y = torch::empty_like(codes); + torch::Tensor z = torch::empty_like(codes); + + // Call CUDA kernel + CPU::hilbert_decode( + codes.size(0), + reinterpret_cast(codes.contiguous().data_ptr()), + reinterpret_cast(x.data_ptr()), + reinterpret_cast(y.data_ptr()), + reinterpret_cast(z.data_ptr()) + ); + + return std::make_tuple(x, y, z); +} + + +torch::Tensor +z_order_encode_cuda( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +) { + // Allocate output tensor + torch::Tensor codes = torch::empty_like(x, torch::dtype(torch::kInt32)); + + // Call CUDA kernel + CUDA::z_order_encode<<<(x.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>( + x.size(0), + reinterpret_cast(x.contiguous().data_ptr()), + reinterpret_cast(y.contiguous().data_ptr()), + reinterpret_cast(z.contiguous().data_ptr()), + reinterpret_cast(codes.data_ptr()) + ); + + return codes; +} + + +std::tuple +z_order_decode_cuda( + const torch::Tensor& codes +) { + // Allocate output tensors + torch::Tensor x = torch::empty_like(codes, torch::dtype(torch::kInt32)); + torch::Tensor y = torch::empty_like(codes, torch::dtype(torch::kInt32)); + torch::Tensor z = torch::empty_like(codes, torch::dtype(torch::kInt32)); + + // Call CUDA kernel + CUDA::z_order_decode<<<(codes.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>( + codes.size(0), + reinterpret_cast(codes.contiguous().data_ptr()), + reinterpret_cast(x.data_ptr()), + reinterpret_cast(y.data_ptr()), + reinterpret_cast(z.data_ptr()) + ); + + return std::make_tuple(x, y, z); +} + + +torch::Tensor +hilbert_encode_cuda( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +) { + // Allocate output tensor + torch::Tensor codes = torch::empty_like(x); + + // Call CUDA kernel + CUDA::hilbert_encode<<<(x.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>( + x.size(0), + reinterpret_cast(x.contiguous().data_ptr()), + reinterpret_cast(y.contiguous().data_ptr()), + reinterpret_cast(z.contiguous().data_ptr()), + reinterpret_cast(codes.data_ptr()) + ); + + return codes; +} + + +std::tuple +hilbert_decode_cuda( + const torch::Tensor& codes +) { + // Allocate output tensors + torch::Tensor x = torch::empty_like(codes); + torch::Tensor y = torch::empty_like(codes); + torch::Tensor z = torch::empty_like(codes); + + // Call CUDA kernel + CUDA::hilbert_decode<<<(codes.size(0) + BLOCK_SIZE - 1) / BLOCK_SIZE, BLOCK_SIZE>>>( + codes.size(0), + reinterpret_cast(codes.contiguous().data_ptr()), + reinterpret_cast(x.data_ptr()), + reinterpret_cast(y.data_ptr()), + reinterpret_cast(z.data_ptr()) + ); + + return std::make_tuple(x, y, z); +} diff --git a/o-voxel/src/serialize/api.h b/o-voxel/src/serialize/api.h new file mode 100644 index 0000000000000000000000000000000000000000..742cb98da2a97894dbdc4fc23918944249526568 --- /dev/null +++ b/o-voxel/src/serialize/api.h @@ -0,0 +1,136 @@ +/* + * Serialize a voxel grid + * + * Copyright (C) 2025, Jianfeng XIANG + * All rights reserved. + * + * Licensed under The MIT License [see LICENSE for details] + * + * Written by Jianfeng XIANG + */ + +#pragma once +#include + + +#define BLOCK_SIZE 256 + + +/** + * Z-order encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the z-order encoded values + */ +torch::Tensor +z_order_encode_cuda( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +); + + +/** + * Z-order decode 3D points + * + * @param codes [N] tensor containing the z-order encoded values + * + * @return 3 tensors [N] containing the x, y, z coordinates + */ +std::tuple +z_order_decode_cuda( + const torch::Tensor& codes +); + + +/** + * Hilbert encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the Hilbert encoded values + */ +torch::Tensor +hilbert_encode_cuda( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +); + + +/** + * Hilbert decode 3D points + * + * @param codes [N] tensor containing the Hilbert encoded values + * + * @return 3 tensors [N] containing the x, y, z coordinates + */ +std::tuple +hilbert_decode_cuda( + const torch::Tensor& codes +); + + +/** + * Z-order encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the z-order encoded values + */ +torch::Tensor +z_order_encode_cpu( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +); + + +/** + * Z-order decode 3D points + * + * @param codes [N] tensor containing the z-order encoded values + * + * @return 3 tensors [N] containing the x, y, z coordinates + */ +std::tuple +z_order_decode_cpu( + const torch::Tensor& codes +); + + +/** + * Hilbert encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the Hilbert encoded values + */ +torch::Tensor +hilbert_encode_cpu( + const torch::Tensor& x, + const torch::Tensor& y, + const torch::Tensor& z +); + + +/** + * Hilbert decode 3D points + * + * @param codes [N] tensor containing the Hilbert encoded values + * + * @return 3 tensors [N] containing the x, y, z coordinates + */ +std::tuple +hilbert_decode_cpu( + const torch::Tensor& codes +); diff --git a/o-voxel/src/serialize/hilbert.cu b/o-voxel/src/serialize/hilbert.cu new file mode 100644 index 0000000000000000000000000000000000000000..6202a663bacea1538ececee8d5d1fde9b8098a26 --- /dev/null +++ b/o-voxel/src/serialize/hilbert.cu @@ -0,0 +1,230 @@ +#include +#include + +#include +namespace cg = cooperative_groups; + +#include "hilbert.h" + + +// Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit. +static __host__ __device__ uint32_t expandBits(uint32_t v) +{ + v = (v * 0x00010001u) & 0xFF0000FFu; + v = (v * 0x00000101u) & 0x0F00F00Fu; + v = (v * 0x00000011u) & 0xC30C30C3u; + v = (v * 0x00000005u) & 0x49249249u; + return v; +} + + +// Removes 2 zeros after each bit in a 30-bit integer. +static __host__ __device__ uint32_t extractBits(uint32_t v) +{ + v = v & 0x49249249; + v = (v ^ (v >> 2)) & 0x030C30C3u; + v = (v ^ (v >> 4)) & 0x0300F00Fu; + v = (v ^ (v >> 8)) & 0x030000FFu; + v = (v ^ (v >> 16)) & 0x000003FFu; + return v; +} + + +__host__ void CPU::hilbert_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +) { + for (size_t thread_id = 0; thread_id < N; thread_id++) { + uint32_t point[3] = {x[thread_id], y[thread_id], z[thread_id]}; + + uint32_t m = 1 << 9, q, p, t; + + // Inverse undo excess work + q = m; + while (q > 1) { + p = q - 1; + for (int i = 0; i < 3; i++) { + if (point[i] & q) { + point[0] ^= p; // invert + } else { + t = (point[0] ^ point[i]) & p; + point[0] ^= t; + point[i] ^= t; + } + } + q >>= 1; + } + + // Gray encode + for (int i = 1; i < 3; i++) { + point[i] ^= point[i - 1]; + } + t = 0; + q = m; + while (q > 1) { + if (point[2] & q) { + t ^= q - 1; + } + q >>= 1; + } + for (int i = 0; i < 3; i++) { + point[i] ^= t; + } + + // Convert to 3D Hilbert code + uint32_t xx = expandBits(point[0]); + uint32_t yy = expandBits(point[1]); + uint32_t zz = expandBits(point[2]); + + codes[thread_id] = xx * 4 + yy * 2 + zz; + } +} + + +__host__ void CPU::hilbert_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +) { + for (size_t thread_id = 0; thread_id < N; thread_id++) { + uint32_t point[3]; + point[0] = extractBits(codes[thread_id] >> 2); + point[1] = extractBits(codes[thread_id] >> 1); + point[2] = extractBits(codes[thread_id]); + + uint32_t m = 2 << 9, q, p, t; + + // Gray decode by H ^ (H/2) + t = point[2] >> 1; + for (int i = 2; i > 0; i--) { + point[i] ^= point[i - 1]; + } + point[0] ^= t; + + // Undo excess work + q = 2; + while (q != m) { + p = q - 1; + for (int i = 2; i >= 0; i--) { + if (point[i] & q) { + point[0] ^= p; + } else { + t = (point[0] ^ point[i]) & p; + point[0] ^= t; + point[i] ^= t; + } + } + q <<= 1; + } + + x[thread_id] = point[0]; + y[thread_id] = point[1]; + z[thread_id] = point[2]; + } +} + + +__global__ void CUDA::hilbert_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +) { + size_t thread_id = cg::this_grid().thread_rank(); + if (thread_id >= N) return; + + uint32_t point[3] = {x[thread_id], y[thread_id], z[thread_id]}; + + uint32_t m = 1 << 9, q, p, t; + + // Inverse undo excess work + q = m; + while (q > 1) { + p = q - 1; + for (int i = 0; i < 3; i++) { + if (point[i] & q) { + point[0] ^= p; // invert + } else { + t = (point[0] ^ point[i]) & p; + point[0] ^= t; + point[i] ^= t; + } + } + q >>= 1; + } + + // Gray encode + for (int i = 1; i < 3; i++) { + point[i] ^= point[i - 1]; + } + t = 0; + q = m; + while (q > 1) { + if (point[2] & q) { + t ^= q - 1; + } + q >>= 1; + } + for (int i = 0; i < 3; i++) { + point[i] ^= t; + } + + // Convert to 3D Hilbert code + uint32_t xx = expandBits(point[0]); + uint32_t yy = expandBits(point[1]); + uint32_t zz = expandBits(point[2]); + + codes[thread_id] = xx * 4 + yy * 2 + zz; +} + + +__global__ void CUDA::hilbert_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +) { + size_t thread_id = cg::this_grid().thread_rank(); + if (thread_id >= N) return; + + uint32_t point[3]; + point[0] = extractBits(codes[thread_id] >> 2); + point[1] = extractBits(codes[thread_id] >> 1); + point[2] = extractBits(codes[thread_id]); + + uint32_t m = 2 << 9, q, p, t; + + // Gray decode by H ^ (H/2) + t = point[2] >> 1; + for (int i = 2; i > 0; i--) { + point[i] ^= point[i - 1]; + } + point[0] ^= t; + + // Undo excess work + q = 2; + while (q != m) { + p = q - 1; + for (int i = 2; i >= 0; i--) { + if (point[i] & q) { + point[0] ^= p; + } else { + t = (point[0] ^ point[i]) & p; + point[0] ^= t; + point[i] ^= t; + } + } + q <<= 1; + } + + x[thread_id] = point[0]; + y[thread_id] = point[1]; + z[thread_id] = point[2]; +} diff --git a/o-voxel/src/serialize/hilbert.h b/o-voxel/src/serialize/hilbert.h new file mode 100644 index 0000000000000000000000000000000000000000..c2026c4149418fa3a65461ec4ef0cc7cb1d83d30 --- /dev/null +++ b/o-voxel/src/serialize/hilbert.h @@ -0,0 +1,74 @@ +#pragma once + +namespace CUDA { +/** + * Hilbert encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the z-order encoded values + */ +__global__ void hilbert_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +); + + +/** + * Hilbert decode 3D points + * + * @param codes [N] tensor containing the z-order encoded values + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + */ +__global__ void hilbert_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +); +} // namespace CUDA + + +namespace CPU { +/** + * Hilbert encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the z-order encoded values + */ +__host__ void hilbert_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +); + + +/** + * Hilbert decode 3D points + * + * @param codes [N] tensor containing the z-order encoded values + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + */ +__host__ void hilbert_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +); +} // namespace CPU diff --git a/o-voxel/src/serialize/z_order.cu b/o-voxel/src/serialize/z_order.cu new file mode 100644 index 0000000000000000000000000000000000000000..8bed8e087450ef838bdf311f33dc6a4623787c08 --- /dev/null +++ b/o-voxel/src/serialize/z_order.cu @@ -0,0 +1,97 @@ +#include +#include + +#include +namespace cg = cooperative_groups; + +#include "z_order.h" + + +// Expands a 10-bit integer into 30 bits by inserting 2 zeros after each bit. +static __host__ __device__ uint32_t expandBits(uint32_t v) +{ + v = (v * 0x00010001u) & 0xFF0000FFu; + v = (v * 0x00000101u) & 0x0F00F00Fu; + v = (v * 0x00000011u) & 0xC30C30C3u; + v = (v * 0x00000005u) & 0x49249249u; + return v; +} + + +// Removes 2 zeros after each bit in a 30-bit integer. +static __host__ __device__ uint32_t extractBits(uint32_t v) +{ + v = v & 0x49249249; + v = (v ^ (v >> 2)) & 0x030C30C3u; + v = (v ^ (v >> 4)) & 0x0300F00Fu; + v = (v ^ (v >> 8)) & 0x030000FFu; + v = (v ^ (v >> 16)) & 0x000003FFu; + return v; +} + + +__host__ void CPU::z_order_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +) { + for (size_t thread_id = 0; thread_id < N; thread_id++) { + uint32_t xx = expandBits(x[thread_id]); + uint32_t yy = expandBits(y[thread_id]); + uint32_t zz = expandBits(z[thread_id]); + + codes[thread_id] = xx * 4 + yy * 2 + zz; + } +} + + +__host__ void CPU::z_order_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +) { + for (size_t thread_id = 0; thread_id < N; thread_id++) { + x[thread_id] = extractBits(codes[thread_id] >> 2); + y[thread_id] = extractBits(codes[thread_id] >> 1); + z[thread_id] = extractBits(codes[thread_id]); + } +} + + + +__global__ void CUDA::z_order_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +) { + size_t thread_id = cg::this_grid().thread_rank(); + if (thread_id >= N) return; + + uint32_t xx = expandBits(x[thread_id]); + uint32_t yy = expandBits(y[thread_id]); + uint32_t zz = expandBits(z[thread_id]); + + codes[thread_id] = xx * 4 + yy * 2 + zz; +} + + +__global__ void CUDA::z_order_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +) { + size_t thread_id = cg::this_grid().thread_rank(); + if (thread_id >= N) return; + + x[thread_id] = extractBits(codes[thread_id] >> 2); + y[thread_id] = extractBits(codes[thread_id] >> 1); + z[thread_id] = extractBits(codes[thread_id]); +} diff --git a/o-voxel/src/serialize/z_order.h b/o-voxel/src/serialize/z_order.h new file mode 100644 index 0000000000000000000000000000000000000000..201461a156d9474ee7bb2edf4d3245a1d4b4d34d --- /dev/null +++ b/o-voxel/src/serialize/z_order.h @@ -0,0 +1,74 @@ +#pragma once + +namespace CUDA { +/** + * Z-order encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the z-order encoded values + */ +__global__ void z_order_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +); + + +/** + * Z-order decode 3D points + * + * @param codes [N] tensor containing the z-order encoded values + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + */ +__global__ void z_order_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +); +} // namespace CUDA + + +namespace CPU { +/** + * Z-order encode 3D points + * + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + * + * @return [N] tensor containing the z-order encoded values + */ +__host__ void z_order_encode( + size_t N, + const uint32_t* x, + const uint32_t* y, + const uint32_t* z, + uint32_t* codes +); + + +/** + * Z-order decode 3D points + * + * @param codes [N] tensor containing the z-order encoded values + * @param x [N] tensor containing the x coordinates + * @param y [N] tensor containing the y coordinates + * @param z [N] tensor containing the z coordinates + */ +__host__ void z_order_decode( + size_t N, + const uint32_t* codes, + uint32_t* x, + uint32_t* y, + uint32_t* z +); +} // namespace CPU diff --git a/o-voxel/third_party/eigen/.clang-format b/o-voxel/third_party/eigen/.clang-format new file mode 100644 index 0000000000000000000000000000000000000000..b9d949c163359825e5f4fe40ea8f6c7cbf6b8a46 --- /dev/null +++ b/o-voxel/third_party/eigen/.clang-format @@ -0,0 +1,19 @@ +--- +BasedOnStyle: Google +ColumnLimit: 120 +--- +Language: Cpp +BasedOnStyle: Google +ColumnLimit: 120 +StatementMacros: + - EIGEN_STATIC_ASSERT + - EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED + - EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN +SortIncludes: false +AttributeMacros: +- EIGEN_STRONG_INLINE +- EIGEN_ALWAYS_INLINE +- EIGEN_DEVICE_FUNC +- EIGEN_DONT_INLINE +- EIGEN_DEPRECATED +- EIGEN_UNUSED diff --git a/o-voxel/third_party/eigen/.git-blame-ignore-revs b/o-voxel/third_party/eigen/.git-blame-ignore-revs new file mode 100644 index 0000000000000000000000000000000000000000..8afa5224f9f3466614ab19f7a8a1a4fffa053bd9 --- /dev/null +++ b/o-voxel/third_party/eigen/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# First major clang-format MR (https://gitlab.com/libeigen/eigen/-/merge_requests/1429). +f38e16c193d489c278c189bc06b448a94adb45fb +# Formatting of tests, examples, benchmarks, et cetera (https://gitlab.com/libeigen/eigen/-/merge_requests/1432). +46e9cdb7fea25d7f7aef4332b9c3ead3857e213d diff --git a/o-voxel/third_party/eigen/.gitattributes b/o-voxel/third_party/eigen/.gitattributes new file mode 100644 index 0000000000000000000000000000000000000000..78e137e864070eaf8c96a3b645906fa5bc35e358 --- /dev/null +++ b/o-voxel/third_party/eigen/.gitattributes @@ -0,0 +1,3 @@ +*.sh eol=lf +debug/msvc/*.dat eol=crlf +debug/msvc/*.natvis eol=crlf diff --git a/o-voxel/third_party/eigen/.gitignore b/o-voxel/third_party/eigen/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..d7367ce3b67a4a6b1540cbe5dad8802c8f5c280d --- /dev/null +++ b/o-voxel/third_party/eigen/.gitignore @@ -0,0 +1,41 @@ +qrc_*cxx +*.orig +*.pyc +*.diff +diff +*.save +save +*.old +*.gmo +*.qm +core +core.* +*.bak +*~ +*.build* +*.moc.* +*.moc +ui_* +CMakeCache.txt +tags +.*.swp +activity.png +*.out +*.php* +*.log +*.orig +*.rej +log +patch +*.patch +a +a.* +lapack/testing +lapack/reference +.*project +.settings +Makefile +!ci/build.gitlab-ci.yml +!scripts/buildtests.in +!Eigen/Core +!Eigen/src/Core diff --git a/o-voxel/third_party/eigen/.gitlab-ci.yml b/o-voxel/third_party/eigen/.gitlab-ci.yml new file mode 100644 index 0000000000000000000000000000000000000000..4cd268164cbb6314ee39503be13a3de361958fd6 --- /dev/null +++ b/o-voxel/third_party/eigen/.gitlab-ci.yml @@ -0,0 +1,38 @@ +# This file is part of Eigen, a lightweight C++ template library +# for linear algebra. +# +# Copyright (C) 2023, The Eigen Authors +# +# This Source Code Form is subject to the terms of the Mozilla +# Public License v. 2.0. If a copy of the MPL was not distributed +# with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +default: +# automatically cancels a job when a new pipeline for the same branch is triggered + interruptible: true + +stages: + - checkformat + - build + - test + - deploy + +variables: + # CMake build directory. + EIGEN_CI_BUILDDIR: .build + # Specify the CMake build target. + EIGEN_CI_BUILD_TARGET: "" + # If a test regex is specified, that will be selected. + # Otherwise, we will try a label if specified. + EIGEN_CI_CTEST_REGEX: "" + EIGEN_CI_CTEST_LABEL: "" + EIGEN_CI_CTEST_ARGS: "" + +include: + - "/ci/checkformat.gitlab-ci.yml" + - "/ci/common.gitlab-ci.yml" + - "/ci/build.linux.gitlab-ci.yml" + - "/ci/build.windows.gitlab-ci.yml" + - "/ci/test.linux.gitlab-ci.yml" + - "/ci/test.windows.gitlab-ci.yml" + - "/ci/deploy.gitlab-ci.yml" diff --git a/o-voxel/third_party/eigen/.gitlab/issue_templates/Bug Report.md b/o-voxel/third_party/eigen/.gitlab/issue_templates/Bug Report.md new file mode 100644 index 0000000000000000000000000000000000000000..cb8bc2569c0ee97a7daf80f22c0c40e61b27b7e5 --- /dev/null +++ b/o-voxel/third_party/eigen/.gitlab/issue_templates/Bug Report.md @@ -0,0 +1,59 @@ + + +### Summary + + +### Environment + +- **Operating System** : Windows/Linux +- **Architecture** : x64/Arm64/PowerPC ... +- **Eigen Version** : 5.0.0 +- **Compiler Version** : gcc-12.0 +- **Compile Flags** : -O3 -march=native +- **Vector Extension** : SSE/AVX/NEON ... + +### Minimal Example + + +```cpp +// Insert your code here. +``` + +### Steps to reproduce the issue + + +1. first step +2. second step +3. ... + +### What is the current *bug* behavior? + + +### What is the expected *correct* behavior? + + +### Relevant logs + + +### [Optional] Benchmark scripts and results + + +### Anything else that might help + diff --git a/o-voxel/third_party/eigen/.gitlab/issue_templates/Feature Request.md b/o-voxel/third_party/eigen/.gitlab/issue_templates/Feature Request.md new file mode 100644 index 0000000000000000000000000000000000000000..b0618b2d7fed4f920ed4bb624016b41c95928672 --- /dev/null +++ b/o-voxel/third_party/eigen/.gitlab/issue_templates/Feature Request.md @@ -0,0 +1,14 @@ + + +### Describe the feature you would like to be implemented. + +### Why Would such a feature be useful for other users? + +### Any hints on how to implement the requested feature? + +### Additional resources diff --git a/o-voxel/third_party/eigen/.gitlab/merge_request_templates/Default.md b/o-voxel/third_party/eigen/.gitlab/merge_request_templates/Default.md new file mode 100644 index 0000000000000000000000000000000000000000..c26e4757456f8504601de27418044daac6792ed7 --- /dev/null +++ b/o-voxel/third_party/eigen/.gitlab/merge_request_templates/Default.md @@ -0,0 +1,30 @@ + + +### Description + + +%{first_multiline_commit} + +### Reference issue + + +### Additional information + diff --git a/o-voxel/third_party/eigen/CHANGELOG.md b/o-voxel/third_party/eigen/CHANGELOG.md new file mode 100644 index 0000000000000000000000000000000000000000..a24f5822f9b2624851ad220e687422824faa32e6 --- /dev/null +++ b/o-voxel/third_party/eigen/CHANGELOG.md @@ -0,0 +1,1935 @@ +# Changelog + +## [Unreleased] + +New features: +- ComplexQZ implementation [!1962] +- Generic clang vector extension backend [!2051] + +## [5.0.1] - 2025-11-11 + +A few bug-fixes from the master branch, including +- Dirty git state [#2995] +- Failing geo_homogeneous tests [#2977] +- Alignment issues [#2982, #2984] +- Missing C++20 `` header [#2986] +- BLAS/LAPACK build on windows [#2980] + +See the full lists of [addressed bugs](https://gitlab.com/libeigen/eigen/-/issues?state=all&label_name%5B%5D=release%3A%3A5.0.1) and [merge requests](https://gitlab.com/libeigen/eigen/-/merge_requests?state=all&label_name%5B%5D=release%3A%3A5.0.1) for more details. + +## [5.0.0] - 2025-09-30 + +Eigen 5.0 provides many new features, performance enhancements, and bugfixes throughout Eigen’s core template expression infrastructure and linear algebra facilities. The full set of changes and related issues are too large to list here, but can be accessed via the release milestone %"5.0". + +This is the last major release to support the C++14 language standard. The master branch and subsequent releases will require support for C++17. + +### Versioning + +This release marks a transition to [Semantic Versioning](https://semver.org/). Previously, Eigen used a WORLD.MAJOR.MINOR scheme. From now on, version numbers will follow the MAJOR.MINOR.PATCH format, indicating breaking changes, new features, and bug fixes, respectively. The WORLD version will remain 3 for this and subsequent releases for posterity. See the table below: +``` +â•”â•â•â•â•â•â•â•â•â•╦â•â•â•â•â•╦â•â•â•â•â•â•— +â•‘ Release â•‘ 3.4 â•‘ 5.0 â•‘ +â• â•â•â•â•â•â•â•â•â•╬â•â•â•â•â•╬â•â•â•â•â•â•£ +â•‘ WORLD â•‘ 3 â•‘ 3 â•‘ +â•‘ MAJOR â•‘ 4 â•‘ 5 â•‘ +â•‘ MINOR â•‘ 0 â•‘ 0 â•‘ +â•‘ PATCH â•‘ - â•‘ 0 â•‘ +╚â•â•â•â•â•â•â•â•â•â•©â•â•â•â•â•â•©â•â•â•â•â•â• +``` + +### Breaking changes + +* Eigen 5.X.X requires C++14. When building with GNU-compatible compilers, set `-std=c++14` or later. As part of this change, some macros such as `EIGEN_HAS_CXX11` have also been removed. +* The CMake build system has been modernized and older properties have been removed - projects relying on CMake may need to update their configurations [!485]. +* All LGPL-licensed code has been removed (i.e. Constrained Conjugate Gradient) [!1197]. These were "unsupported" anyways, and weren't widely used. +* Due to name conflicts with other projects, `Eigen::all` and `Eigen::last` have been moved to `Eigen::placeholders::all` and `Eigen::placeholders::last` [!649]. +* Any direct inclusion of an internal header (i.e. under a `../src/..` path) will result in a compilation error [!631]. +* Runtime SVD options for computing thin/full U/V have been deprecated: use compile-time options instead [!826]. +* Scalar (i.e. non-vectorized) comparisons now return masks with values of `Scalar(1)` rather than having all bits set to avoid undefined behavior [!1862]. +* BLAS return types have been changed for Eigen BLAS to `void` instead of `int` for compatibility with other BLAS implementations [!1497]. +* `Eigen::aligned_allocator` no longer inherits from `std::allocator` due to a change in the standard and the use of `allocate_at_least` [!1795]. +* Euler angles are now returned in a more canonical form, potentially resulting in a change of behavior [!1301, !1314]. +* Eigen's random number generation has changed, resulting in a change of behavior. Please do not rely on specific random numbers from Eigen - these were never guaranteed to be consistent across Eigen versions, nor are they generally consistent across platforms [!1437]. + +## [3.4.1] - 2025-09-30 + +Many bug fixes have been backported from the main branch. + +A list of new issues addressed can be found via the [3.4.1](https://gitlab.com/libeigen/eigen/-/issues?state=all&label_name%5B%5D=3.4.1) label on GitLab. + +Check the [git commit history](https://gitlab.com/libeigen/eigen/-/commits/3.4.1) for the full list of changes. + +## [3.4.0] - 2021-08-18 + +**Notice:** 3.4.x will be the last major release series of Eigen that will support c++03. + +### Breaking changes + +* Using float or double for indexing matrices, vectors and arrays will now fail to compile +* **Behavioral change:** `Transform::computeRotationScaling()` and `Transform::computeScalingRotation()` are now more continuous across degeneracies (see !349). + +### New features + +* Add c++11 **`initializer_list` constructors** to Matrix and Array [\[doc\]](http://eigen.tuxfamily.org/dox-devel/group__TutorialMatrixClass.html#title3) +* Add STL-compatible **iterators** for dense expressions [\[doc\]](http://eigen.tuxfamily.org/dox-devel/group__TutorialSTL.html). +* New versatile API for sub-matrices, **slices**, and **indexed views** [\[doc\]](http://eigen.tuxfamily.org/dox-devel/group__TutorialSlicingIndexing.html). +* Add C++11 **template aliases** for Matrix, Vector, and Array of common sizes, including generic `Vector` and `RowVector` aliases [\[doc\]](http://eigen.tuxfamily.org/dox-devel/group__matrixtypedefs.html). +* New support for `bfloat16`. + +### New backends + +* **Arm SVE:** fixed-length [Scalable Vector Extensions](https://developer.arm.com/Architectures/Scalable%20Vector%20Extensions) vectors for `uint32_t` and `float` are available. +* **MIPS MSA:**: [MIPS SIMD Architecture (MSA)](https://www.mips.com/products/architectures/ase/simd/) +* **AMD ROCm/HIP:** generic GPU backend that unifies support for [NVIDIA/CUDA](https://developer.nvidia.com/cuda-toolkit) and [AMD/HIP](https://rocmdocs.amd.com/en/latest/). +* **Power 10 MMA:** initial support for [Power 10 matrix multiplication assist instructions](https://arxiv.org/pdf/2104.03142.pdf) for float32 and float64, real and complex. + +### Improvements + +* Eigen now uses the c++11 **alignas** keyword for static alignment. Users targeting C++17 only and recent compilers (e.g., GCC>=7, clang>=5, MSVC>=19.12) will thus be able to completely forget about all [issues](http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html) related to static alignment, including `EIGEN_MAKE_ALIGNED_OPERATOR_NEW`. +* Various performance improvements for products and Eigen's GEBP and GEMV kernels have been implemented: + * By using half and quater-packets the performance of matrix multiplications of small to medium sized matrices has been improved + * Eigen's GEMM now falls back to GEMV if it detects that a matrix is a run-time vector + * The performance of matrix products using Arm Neon has been drastically improved (up to 20%) + * Performance of many special cases of matrix products has been improved +* Large speed up from blocked algorithm for `transposeInPlace`. +* Speed up misc. operations by propagating compile-time sizes (col/row-wise reverse, PartialPivLU, and others) +* Faster specialized SIMD kernels for small fixed-size inverse, LU decomposition, and determinant. +* Improved or added vectorization of partial or slice reductions along the outer-dimension, for instance: `colmajor_mat.rowwise().mean()`. + +### Elementwise math functions + +* Many functions are now implemented and vectorized in generic (backend-agnostic) form. +* Many improvements to correctness, accuracy, and compatibility with c++ standard library. + * Much improved implementation of `ldexp`. + * Misc. fixes for corner cases, NaN/Inf inputs and singular points of many functions. + * New implementation of the Payne-Hanek for argument reduction algorithm for `sin` and `cos` with huge arguments. + * New faithfully rounded algorithm for `pow(x,y)`. +* Speedups from (new or improved) vectorized versions of `pow`, `log`, `sin`, `cos`, `arg`, `pow`, `log2`, complex `sqrt`, `erf`, `expm1`, `logp1`, `logistic`, `rint`, `gamma` and `bessel` functions, and more. +* Improved special function support (Bessel and gamma functions, `ndtri`, `erfc`, inverse hyperbolic functions and more) +* New elementwise functions for `absolute_difference`, `rint`. + +### Dense matrix decompositions and solvers + +* All dense linear solvers (i.e., Cholesky, *LU, *QR, CompleteOrthogonalDecomposition, *SVD) now inherit SolverBase and thus support `.transpose()`, `.adjoint()` and `.solve()` APIs. +* SVD implementations now have an `info()` method for checking convergence. +* Most decompositions now fail quickly when invalid inputs are detected. +* Optimized the product of a `HouseholderSequence` with the identity, as well as the evaluation of a `HouseholderSequence` to a dense matrix using faster blocked product. +* Fixed aliasing issues with in-place small matrix inversions. +* Fixed several edge-cases with empty or zero inputs. + +### Sparse matrix support, decompositions and solvers + +* Enabled assignment and addition with diagonal matrix expressions. +* Support added for SuiteSparse KLU routines via the `KLUSupport` module. SuiteSparse must be installed to use this module. +* `SparseCholesky` now works with row-major matrices. +* Various bug fixes and performance improvements. + +### Type support + +* Improved support for `half` + * Native support added for ARM `__fp16`, CUDA/HIP `__half`, and `F16C` conversion intrinsics. + * Better vectorization support added across all backends. +* Improved bool support + * Partial vectorization support added for boolean operations. + * Significantly improved performance (x25) for logical operations with `Matrix` or `Tensor` of `bool`. +* Improved support for custom types + * More custom types work out-of-the-box (see #2201). + +### Backend-specific improvements + +* **Arm NEON** + * Now provides vectorization for `uint64_t`, `int64_t`, `uint32_t`, `int16_t`, `uint16_t`, `int16_t`, `int8_t`, and `uint8_t` + * Emulates `bfloat16` support when using `Eigen::bfloat16` + * Supports emulated and native `float16` when using `Eigen::half` +* **SSE/AVX/AVX512** + * General performance improvements and bugfixes. + * Enabled AVX512 instructions by default if available. + * New `std::complex`, `half`, and `bfloat16` vectorization support added. + * Many missing packet functions added. +* **Altivec/Power** + * General performance improvement and bugfixes. + * Enhanced vectorization of real and complex scalars. + * Changes to the `gebp_kernel` specific to Altivec, using VSX implementation of the MMA instructions that gain speed improvements up to 4x for matrix-matrix products. + * Dynamic dispatch for GCC greater than 10 enabling selection of MMA or VSX instructions based on `__builtin_cpu_supports`. +* **GPU (CUDA and HIP)** + * Several optimized math functions added, better support for `std::complex`. + * Added option to disable CUDA entirely by defining `EIGEN_NO_CUDA`. + * Many more functions can now be used in device code (e.g. comparisons, small matrix inversion). +* **ZVector** + * Vectorized `float` and `std::complex` support added. + * Added z14 support. +* **SYCL** + * Redesigned SYCL implementation for use with the [https://eigen.tuxfamily.org/dox/unsupported/eigen_tensors.html Tensor] module, which can be enabled by defining `EIGEN_USE_SYCL`. + * New generic memory model introduced used by `TensorDeviceSycl`. + * Better integration with OpenCL devices. + * Added many math function specializations. + +### Miscellaneous API Changes + +* New `setConstant(...)` methods for preserving one dimension of a matrix by passing in `NoChange`. +* Added `setUnit(Index i)` for vectors that sets the ''i'' th coefficient to one and all others to zero. +* Added `transpose()`, `adjoint()`, `conjugate()` methods to `SelfAdjointView`. +* Added `shiftLeft()` and `shiftRight()` coefficient-wise arithmetic shift functions to Arrays. +* Enabled adding and subtracting of diagonal expressions. +* Allow user-defined default cache sizes via defining `EIGEN_DEFAULT_L1_CACHE_SIZE`, ..., `EIGEN_DEFAULT_L3_CACHE_SIZE`. +* Added `EIGEN_ALIGNOF(X)` macro for determining alignment of a provided variable. +* Allow plugins for `VectorwiseOp` by defining a file `EIGEN_VECTORWISEOP_PLUGIN` (e.g. `-DEIGEN_VECTORWISEOP_PLUGIN=my_vectorwise_op_plugins.h`). +* Allow disabling of IO operations by defining `EIGEN_NO_IO`. + +### Improvement to NaN propagation + +* Improvements to NaN correctness for elementwise functions. +* New `NaNPropagation` template argument to control whether NaNs are propagated or suppressed in elementwise `min/max` and corresponding reductions on `Array`, `Matrix`, and `Tensor`. + +### New low-latency non-blocking ThreadPool module +* Originally a part of the Tensor module, `Eigen::ThreadPool` is now separate and more portable, and forms the basis for multi-threading in TensorFlow, for example. + +### Changes to Tensor module + +* Support for c++03 was officially dropped in Tensor module, since most of the code was written in c++11 anyway. This will prevent building the code for CUDA with older version of `nvcc`. +* Performance optimizations of Tensor contraction + * Speed up "outer-product-like" operations by parallelizing over the contraction dimension, using thread_local buffers and recursive work splitting. + * Improved threading heuristics. + * Support for fusing element-wise operations into contraction during evaluation. +* Performance optimizations of other Tensor operator + * Speedups from improved vectorization, block evaluation, and multi-threading for most operators. + * Significant speedup to broadcasting. + * Reduction of index computation overhead, e.g. using fast divisors in TensorGenerator, squeezing dimensions in TensorPadding. +* Complete rewrite of the block (tiling) evaluation framework for tensor expressions lead to significant speedups and reduced number of memory allocations. +* Added new API for asynchronous evaluation of tensor expressions. +* Misc. minor behavior changes & fixes: + * Fix const correctness for TensorMap. + * Modify tensor argmin/argmax to always return first occurrence. + * More numerically stable tree reduction. + * Improve randomness of the tensor random generator. + * Update the padding computation for `PADDING_SAME` to be consistent with TensorFlow. + * Support static dimensions (aka IndexList) in resizing/reshape/broadcast. + * Improved accuracy of Tensor FFT. + +### Changes to sparse iterative solvers +* Added new IDRS iterative linear solver. + +### Other relevant changes + +* Eigen now provides an option to test with an external BLAS library + +See the [announcement](https://www.eigen.tuxfamily.org/index.php?title=3.4) for more details. + +## [3.3.9] - 2020-12-04 + +Changes since 3.3.8: + +* Commit 4e5385c90: Introduce rendering Doxygen math formulas with MathJax and the option `EIGEN_DOC_USE_MATHJAX` to control this. +* #1746: Removed implementation of standard copy-constructor and standard copy-assign-operator from PermutationMatrix and Transpositions to allow malloc-less `std::move`. +* #2036: Make sure the find_standard_math_library_test_program compiles and doesn't optimize away functions we try to test for. +* #2046: Rename test/array.cpp to test/array_cwise.cpp to fix an issue with the C++ standard library header "array" +* #2040: Fix an issue in test/ctorleak that occured when disabling exceptions. +* #2011: Remove error counting in OpenMP parallel section in Eigen's GEMM parallelizing logic. +* #2012: Define coeff-wise binary array operators for base class to fix an issue when using Eigen with C++20 +* Commit bfdd4a990: Fix an issue with Intel® MKL PARDISO support. + +## [3.3.8] - 2020-10-05 + +Changes since 3.3.7: + +* General bug fixes + * #1995: Fix a failure in the GEBP kernel when using small L1 cache sizes, OpenMP and FMA. + * #1990: Make CMake accept installation paths relative to `CMAKE_INSTALL_PREFIX`. + * #1974: Fix issue when reserving an empty sparse matrix + * #1823: Fix incorrect use of `std::abs` + * #1788: Fix rule-of-three violations inside the stable modules. This fixes deprecated-copy warnings when compiling with GCC>=9 Also protect some additional Base-constructors from getting called by user code code (#1587) + * #1796: Make matrix squareroot usable for Map and Ref types + * #1281: Fix AutoDiffScalar's make_coherent for nested expression of constant ADs. + * #1761: Fall back `is_integral` to `std::is_integral` in c++11 and fix `internal::is_integral` with MSVC 2013 and older. + * #1741: Fix `self_adjoint*matrix`, `triangular*matrix`, and `triangular^1*matrix` with a destination having a non-trivial inner-stride + * #1741: Fix SelfAdjointView::rankUpdate and product to triangular part for destination with non-trivial inner stride + * #1741: Fix `C.noalias() = A*C;` with `C.innerStride()!=1` + * #1695: Fix a numerical robustness issue in BDCSVD + * #1692: Enable enum as sizes of Matrix and Array + * #1689: Fix used-but-marked-unused warning + * #1679: Avoid possible division by 0 in complex-schur + * #1676: Fix C++17 template deduction in DenseBase + * #1669: Fix PartialPivLU/inverse with zero-sized matrices. + * #1557: Fix RealSchur and EigenSolver for matrices with only zeros on the diagonal. +* Performance related fixes + * #1562: Optimize evaluation of small products of the form s*A*B by rewriting them as: s*(A.lazyProduct(B)) to save a costly temporary. Measured speedup from 2x to 5x... + * Commit 165db26dc and 8ee2e10af: Fix performance issue with SimplicialLDLT for complexes coefficients +* Misc commits + * Commit 5f1082d0b: Fix `QuaternionBase::cast` for quaternion map and wrapper. + * Commit a153dbae9: Fix case issue with Lapack unit tests. + * Commit 3d7e2a1f3: Fix possible conflict with an externally defined "real" type when using gcc-5. + * Commit 1760432f6: Provide `numext::[u]int{32,64}_t`. + * Commit 3d18879fc: Initialize isometric transforms like affine transforms. + * Commit 160c0a340: Change typedefs from private to protected to fix MSVC compilation. + * Commit 3cf273591: Fix compilation of FFTW unit test. + * Commit 6abc9e537: Fix compilation of BLAS backend and frontend. + * Commit 47e2f8a42: Fix real/imag namespace conflict. + * Commit 71d0402e3: Avoid throwing in destructors. + * Commit 0dd9643ad: Fix precision issue in `SelfAdjointEigenSolver.h` + * Commit 6ed74ac97: Make `digits10()` return an integer. + * Commit 841d844f9: Use pade for matrix exponential also for complex values. + * Commit 4387298e8: Cast Index to RealScalar in SVDBase to fix an issue when RealScalar is not implicitly convertible to Index. + * Commit fe8cd812b: Provide `EIGEN_HAS_C99_MATH` when using MSVC. + * Commit 7c4208450: Various fixes in polynomial solver and its unit tests. + * Commit e777674a8 and 4415d4e2d: Extend polynomial solver unit tests to complexes. + * Commit 222ce4b49: Automatically switch between EigenSolver and ComplexEigenSolver, and fix a few Real versus Scalar issues. + * Commit 7b93328ba: Enable construction of `Ref` from a runtime vector. + * Commit c28ba89fe: Fix a problem of old gcc versions having problems with recursive #pragma GCC diagnostic push/pop. + * Commit 210d510a9: Fix compilation with expression template scalar type. + * Commit efd72cddc: Backport AVX512 implementation to 3.3. + * Commit 5e484fa11: Fix StlDeque compilation issue with GCC 10. + * Commit a796be81a: Avoid false-positive test results in non-linear optimization tests + * Commit 9f202c6f1: Fix undefined behaviour caused by uncaught exceptions in OMP section of parallel GEBP kernel. + * Commit 4707c3aa8: Fix a bug with half-precision floats on GPUs. +* Fixed warnings + * Commit 14db78c53: Fix some maybe-uninitialized warnings in AmbiVector.h and test bdcsvd. + * Commit f1b1f13d3: silent cmake warnings in Lapack CMakeLists.txt + * Commit 8fb28db12: Rename variable which shadows class name in Polynomials module. + * Commit f1c12d8ff: Workaround gcc's `alloc-size-larger-than=` warning in DenseStorage.h + * Commit 6870a39fe: Hide some unused variable warnings in g++8.1 in Tensor contraction mapper. + * Commit bb9981e24: Fix gcc 8.1 warning: "maybe use uninitialized" in std tests + * Commit eea99eb4e: Fix always true warning with gcc 4.7in test numext. + * Commit 65a6d4151: Fix nonnull-compare warning in test geo_alignedbox. + * Commit 74a0c08d7: Disable ignoring attributes warning in vectorization logic test. + * Commit 6c4d57dc9: Fix a gcc7 warning about bool * bool in abs2 default implementation. + * Commit 89a86ed42: Fix a warning in SparseSelfAdjointView about a branch statement always evaluation to false. + +## [3.3.8-rc1] - 2020-09-14 + +Changes since 3.3.7: + +* General bug fixes + * #1974: Fix issue when reserving an empty sparse matrix + * #1823: Fix incorrect use of `std::abs` + * #1788: Fix rule-of-three violations inside the stable modules. This fixes deprecated-copy warnings when compiling with GCC>=9 Also protect some additional Base-constructors from getting called by user code code (#1587) + * #1796: Make matrix squareroot usable for Map and Ref types + * #1281: Fix AutoDiffScalar's `make_coherent` for nested expression of constant ADs. + * #1761: Fall back `is_integral` to `std::is_integral` in c++11 and fix `internal::is_integral` with MSVC 2013 and older. + * #1741: Fix `self_adjoint*matrix`, `triangular*matrix`, and `triangular^1*matrix` with a destination having a non-trivial inner-stride + * #1741: Fix SelfAdjointView::rankUpdate and product to triangular part for destination with non-trivial inner stride + * #1741: Fix `C.noalias() = A*C;` with `C.innerStride()!=1` + * #1695: Fix a numerical robustness issue in BDCSVD + * #1692: Enable enum as sizes of Matrix and Array + * #1689: Fix used-but-marked-unused warning + * #1679: Avoid possible division by 0 in complex-schur + * #1676: Fix C++17 template deduction in DenseBase + * #1669: Fix PartialPivLU/inverse with zero-sized matrices. + * #1557: Fix RealSchur and EigenSolver for matrices with only zeros on the diagonal. +* Performance related fixes + * #1562: Optimize evaluation of small products of the form s*A*B by rewriting them as: s*(A.lazyProduct(B)) to save a costly temporary. Measured speedup from 2x to 5x... + * Commit 165db26dc and 8ee2e10af: Fix performance issue with SimplicialLDLT for complexes coefficients +* Misc commits + * Commit 5f1082d0b: Fix `QuaternionBase::cast` for quaternion map and wrapper. + * Commit a153dbae9: Fix case issue with Lapack unit tests. + * Commit 3d7e2a1f3: Fix possible conflict with an externally defined "real" type when using gcc-5. + * Commit 1760432f6: Provide `numext::[u]int{32,64}_t`. + * Commit 3d18879fc: Initialize isometric transforms like affine transforms. + * Commit 160c0a340: Change typedefs from private to protected to fix MSVC compilation. + * Commit 3cf273591: Fix compilation of FFTW unit test. + * Commit 6abc9e537: Fix compilation of BLAS backend and frontend. + * Commit 47e2f8a42: Fix real/imag namespace conflict. + * Commit 71d0402e3: Avoid throwing in destructors. + * Commit 0dd9643ad: Fix precision issue in SelfAdjointEigenSolver.h + * Commit 6ed74ac97: Make digits10() return an integer. + * Commit 841d844f9: Use pade for matrix exponential also for complex values. + * Commit 4387298e8: Cast Index to RealScalar in SVDBase to fix an issue when RealScalar is not implicitly convertible to Index. + * Commit fe8cd812b: Provide `EIGEN_HAS_C99_MATH` when using MSVC. + * Commit 7c4208450: Various fixes in polynomial solver and its unit tests. + * Commit e777674a8 and 4415d4e2d: Extend polynomial solver unit tests to complexes. + * Commit 222ce4b49: Automatically switch between EigenSolver and ComplexEigenSolver, and fix a few Real versus Scalar issues. + * Commit 5110d803e: Change license from LGPL to MPL2 with agreement from David Harmon. (grafted from 2df4f0024666a9085fe47f14e2290bd61676dbbd ) + * Commit 7b93328ba: Enable construction of `Ref` from a runtime vector. + * Commit c28ba89fe: Fix a problem of old gcc versions having problems with recursive #pragma GCC diagnostic push/pop. + * Commit 210d510a9: Fix compilation with expression template scalar type. +* Fixed warnings + * Commit 14db78c53: Fix some maybe-uninitialized warnings in AmbiVector.h and test bdcsvd. + * Commit f1b1f13d3: silent cmake warnings in Lapack CMakeLists.txt + * Commit 8fb28db12: Rename variable which shadows class name in Polynomials module. + * Commit f1c12d8ff: Workaround gcc's `alloc-size-larger-than=` warning in DenseStorage.h + * Commit 6870a39fe: Hide some unused variable warnings in g++8.1 in Tensor contraction mapper. + * Commit bb9981e24: Fix gcc 8.1 warning: "maybe use uninitialized" in std tests + * Commit eea99eb4e: Fix always true warning with gcc 4.7in test `numext`. + * Commit 65a6d4151: Fix nonnull-compare warning in test `geo_alignedbox`. + * Commit 74a0c08d7: Disable ignoring attributes warning in vectorization logic test. + * Commit 6c4d57dc9: Fix a gcc7 warning about bool * bool in abs2 default implementation. + * Commit efd72cddc: Backport AVX512 implementation to 3.3. + * Commit 5e484fa11: Fix StlDeque compilation issue with GCC 10. + * Commit 89a86ed42: Fix a warning in SparseSelfAdjointView about a branch statement always evaluation to false. + * Commit dd6de618: Fix a bug with half-precision floats on GPUs. + +## [3.3.7] - 2018-12-11 + +Changes since 3.3.6: + +* #1643: Fix compilation with GCC>=6 and compiler optimization turned off. + +## [3.3.6] - 2018-12-10 + +Changes since 3.3.5: + +* #1617: Fix triangular solve crashing for empty matrix. +* #785: Make dense Cholesky decomposition work for empty matrices. +* #1634: Remove double copy in move-ctor of non movable Matrix/Array. +* Changeset a2d6c106a450: Workaround weird MSVC bug. +* #1637 Workaround performance regression in matrix products with gcc>=6 and clang>=6.0. +* Changeset 9ccbaaf3dd4c: Fix some implicit 0 to Scalar conversions. +* #1605: Workaround ABI issue with vector types (aka `__m128`) versus scalar types (aka float). +* Changeset 148e579cc004: Fix for gcc<4.6 regarding usage of #pragma GCC diagnostic push/pop. +* Changeset bc000deaae45: Fix conjugate-gradient for right-hand-sides with a very small magnitude. +* Changeset 5be00b0e2964: Fix product of empty arrays (returned 0 instead of 1). +* #1590: Fix collision with some system headers defining the macro FP32. +* #1584: Fix possible undefined behavior in random generation. +* Changeset e4127b0f7d3b: Fix fallback to BLAS for rankUpdate. +* Fixes for NVCC 9. +* Fix matrix-market IO. +* Various fixes in the doc. +* Various minor warning fixes/workarounds. + +## [3.3.5] - 2018-07-23 + +Changes since 3.3.4: + +* General bug fixes: + * Fix GeneralizedEigenSolver when requesting for eigenvalues only (ab3fa2e12308) + * #1560 fix product with a 1x1 diagonal matrix (483beabab9bf) + * #1543: fix linear indexing in generic block evaluation + * Fix compilation of product with inverse transpositions (e.g., `mat * Transpositions().inverse()`) (170914dbbcc3) + * #1509: fix `computeInverseWithCheck` for complexes (a2a2c3c86507) + * #1521: avoid signalling `NaN` in hypot and make it std::complex<> friendly (b18e2d422b09). + * #1517: fix triangular product with unit diagonal and nested scaling factor: `(s*A).triangularView()*B` (c24844195d90) + * Fix compilation of stableNorm for some odd expressions as input (33b972d8b384) + * #1485: fix linking issue of non template functions (d18877f18d8e) + * Fix overflow issues in BDCSVD (7a875acfb05f) + * #1468: add missing `std::` to `memcpy` (32a6db0f8cd5) + * #1453: fix Map with non-default inner-stride but no outer-stride (1ca9072b51d8) + * Fix mixing types in sparse matrix products (4ead16cdd6c8) + * #1544: Generate correct Q matrix in complex case (39125654ce9e) + * #1461: fix compilation of `Map::x()` (9a266e5118cf) + +* Backends: + * Fix MKL backend for symmetric eigenvalues on row-major matrices (eab7afe25273) + * #1527: fix support for MKL's VML (86a939451c75) + * Fix incorrect ldvt in LAPACKE call from JacobiSVD (bfc66e8b9a3b) + * Fix support for MKL's BLAS when using `MKL_DIRECT_CALL` (9df7f3d8e9cd, 3108fbf76708, 292dea7922e7) + * Use MKL's lapacke.h header when using MKL (070b5958e0ae) + +* Diagnostics: + * #1516: add assertion for out-of-range diagonal index in `MatrixBase::diagonal(i)` (273738ba6f6e) + * Add static assertion for fixed sizes `Ref<>` (1724dae8b834) + * Add static assertion on selfadjoint-view's UpLo parameter. (74daf12e525e, 190b46dd1f05) + * #1479: fix failure detection in LDLT (c20043c8fd64) + +* Compiler support: + * #1555: compilation fix with XLC + * Workaround MSVC 2013 ambiguous calls (c92536d92647) + * Adds missing `EIGEN_STRONG_INLINE` to help MSVC properly inlining small vector calculations (01fb6217335b) + * Several minor warning fixes: f90d136c8445, 542fb03968c2, "used uninitialized" (7634a44bfe11), Wint-in-bool-context (3d1795da28c2, d1c2d6683c55) + * #1428: make NEON vectorization compilable by MSVC. (* 1e2d2693b911, 927d023ceaab) + * Fix compilation and SSE support with PGI compiler (bb87f618bfc3 450c5e5d2771) + * #1555: compilation fix with XLC (20ca86888e70) + * #1520: workaround some `-Wfloat-equal` warnings by calling `std::equal_to` (1c4fdad7bd6f) + * Make the TensorStorage class compile with clang 3.9 (a7144f8d6a94) + * Misc: some old compiler fixes (b60cbbef3791) + * Fix MSVC warning C4290: C++ exception specification ignored except to indicate a function is not `__declspec(nothrow)` (3df78d5afc1e) + +* Architecture support: + * Several AVX512 fixes for `log`, `sqrt`, `rsqrt`, non `AVX512ER` CPUs, `apply_rotation_in_the_plane` 5c59564bfb92 1939c971a3db c2f9e6cb37e5, 609e425166f6. + * AltiVec fixes: 1641a6cdd5a4 + * NEON fixes: const-cast (877a2b64c9ba), compilation of Jacobi rotations (bc837b797559,#1436). + * Changeset 971b32440c74: Define `pcast<>` for SSE types even when AVX is enabled. (otherwise float are silently reinterpreted as int instead of being converted) + * #1494: makes `pmin`/`pmax` behave on Altivec/VSX as on x86 regarding NaNs (892c0a79ce93) + +* Documentation: + * Update manual pages regarding BDCSVD (#1538) + * Add aliasing in common pitfaffs (656712d48f6b) + * Update `aligned_allocator` (6fc0f2be70a4) + * #1456: add perf recommendation for LLT and storage format (55fbf4fedd04, 9fd138e2b333) + * #1455: Cholesky module depends on Jacobi for rank-updates (b87875abf8dc) + * #1458: fix documentation of LLT and LDLT `info()` method (ac2c97edff07) + * Warn about constness in `LLT::solveInPlace` (51e1aa153957) + * Fix lazyness of `operator*` with CUDA (fa77d713359d) + * #336: improve doc for `PlainObjectBase::Map` (18868228adae) + +* Other general improvements: + * Enable linear indexing in generic block evaluation (15752027ec2f, 80af7d6a47c1, #1543). + * Fix packet and alignment propagation logic of `Block` expressions. In particular, `(A+B).col(j)` now preserve vectorisation. (9c9e90f6db7e) + * Several fixes regarding custom scalar type support: hypot (385d8b5e42c2), boost-multiprec (5f71579a2d3f), literal casts (e6577f3c3049, fbb0c510c52f), + * LLT: avoid making a copy when decomposing in place (9d03711df8bc), const the arg to `solveInPlace()` to allow passing `.transpose()`, `.block()`, etc. (0137ed4f19b6). + * Add possibility to overwrite `EIGEN_STRONG_INLINE` (6d6e5fcd4356) + * #1528: use `numeric_limits::min()` instead of `1/highest()` that might underflow (9ff315024335) + * #1532: disable `stl::*_negate` in C++17 (they are deprecated) (3fb42ff7b278) + * Add C++11 `max_digits10` for half (70ac6c923001) + * Make sparse QR result sizes consistent with dense QR (2136cfa17e28) + +* Unsupported/unit-tests/cmake/unvisible internals/etc. + * #1484: restore deleted line for 128 bits long doubles, and improve dispatching logic. (c8e663fe87ec) + * #1462: remove all occurences of the deprecated `__CUDACC_VER__` macro by introducing `EIGEN_CUDACC_VER` (e7c065ec717b) + * Changeset fea50d40ea79: Fix oversharding bug in parallelFor. + * Changeset 866d222d6065: commit 45e9c9996da790b55ed9c4b0dfeae49492ac5c46 (HEAD -> memory_fix) + * Changeset 48048172e5aa: Fix int versus Index + * Changeset 906a98fe39c3: fix linking issue + * Changeset 352489edbe36: Fix short vs long + * Changeset 81e94eea024c: Fix cmake scripts with no fortran compiler + * Changeset 8bd392ca0e3f: add cmake-option to enable/disable creation of tests + * Changeset 02c0cef97fb5: Use col method for column-major matrix + * Changeset a8d2459f8e1f: #1449: fix `redux_3` unit test + * Changeset e90a14609a56: Fix uninitialized output argument. + * Changeset 5d40715db6a7: Handle min/max/inf/etc issue in `cuda_fp16.h` directly in `test/main.h` + * Changeset 2f9de522457b: Add tests for sparseQR results (value and size) covering bugs 1522 and 1544 + * Changeset 4662c610c13c: `SelfAdjointView<...,Mode>` causes a static assert since commit d820ab9edc0b + * Changeset 96134409fc91: weird compilation issue in `mapped_matrix.cpp` + +## [3.3.4] - 2017-06-15 + +Changes since 3.3.3: + +* General: + * Improve speed of Jacobi rotation when mixing complex and real types. + * #1405: enable StrictlyLower/StrictlyUpper triangularView as the destination of matrix*matrix products. + * UmfPack support: enable changes in the control settings and add report functions. + * #1423: fix LSCG's Jacobi preconditioner for row-major matrices. + * #1424: fix compilation issue with abs and unsigned integers as scalar type. + * #1410: fix lvalue propagation of Array/Matrix-Wrapper with a const nested expression. + * #1403: fix several implicit scalar type conversion making SVD decompositions compatible with ceres::Jet. + * Fix some real-to-scalar-to-real useless conversions in `ColPivHouseholderQR`. +* Regressions: + * Fix `dense * sparse_selfadjoint_view` product. + * #1417: make LinSpace compatible with std::complex. + * #1400: fix `stableNorm` alignment issue with `EIGEN_DONT_ALIGN_STATICALLY`. + * #1411: fix alignment issue in `Quaternion`. + * Fix compilation of operations between nested Arrays. + * #1435: fix aliasing issue in expressions like: `A = C - B*A`. +* Others: + * Fix compilation with gcc 4.3 and ARM NEON. + * Fix prefetches on ARM64 and ARM32. + * Fix out-of-bounds check in COLAMD. + * Few minor fixes regarding nvcc/CUDA support, including #1396. + * Improve cmake scripts for Pastix and BLAS detection. + * #1401: fix compilation of `cond ? x : -x` with `x` an `AutoDiffScalar` + * Fix compilation of matrix log with Map as input. + * Add specializations of `std::numeric_limits` for `Eigen::half` and and `AutoDiffScalar` + * Fix compilation of streaming nested Array, i.e., `std::cout << Array>` + +## [3.3.3] - 2017-02-21 + +Changes since 3.3.2: + +* General: + * Improve multi-threading heuristic for matrix products with a small number of columns. + * #1395: fix compilation of JacobiSVD for vectors type. + * Fix pruning in `(sparse*sparse).pruned()` when the result is nearly dense. + * #1382: move using `std::size_t`/`ptrdiff_t` to Eigen's namespace. + * Fix compilation and inlining when using clang-cl with visual studio. + * #1392: fix `#include ` with mpl2-only. +* Regressions: + * #1379: fix compilation in `sparse*diagonal*dense` with OpenMP. + * #1373: add missing assertion on size mismatch with compound assignment operators (e.g., mat += mat.col(j)) + * #1375: fix cmake installation with cmake 2.8. + * #1383: fix LinSpaced with integers for `LinPspaced(n,0,n-1)` with `n==0` or the `high(sparse*sparse)`. +* Others: + * Fix ARM NEON wrapper for 16 byte systems. + * #1391: include IO.h before DenseBase to enable its usage in DenseBase plugins. + * #1389: fix std containers support with MSVC and AVX. + * #1380: fix matrix exponential with `Map<>`. + * #1369: fix type mismatch warning with OpenMP. + * Fix usage of `size_t` instead of Index in sefl-adjoint `matrix * vector` + * #1378: fix doc (`DiagonalIndex` vs `Diagonal`). + +## [3.3.2] - 2017-01-18 + +Changes since 3.3.1: + +* General: + * Add `transpose`, `adjoint`, `conjugate` methods to `SelfAdjointView` (useful to write generic code) + * Make sure that `HyperPlane::transform` maintains a unit normal vector in the Affine case. + * Several documentation improvements, including: several doxygen workarounds, #1336, #1370, StorageIndex, selfadjointView, sparseView(), sparse triangular solve, AsciiQuickReference.txt, ... +* Regressions: + * #1358: fix compilation of `sparse += sparse.selfadjointView()`. + * #1359: fix compilation of `sparse /=scalar`, `sparse *=scalar`, and `col_major_sparse.row() *= scalar`. + * #1361: fix compilation of mat=perm.inverse() + * Some fixes in sparse coeff-wise binary operations: add missing `.outer()` member to iterators, and properly report storage order. + * Fix aliasing issue in code as `A.triangularView() = B*A.sefladjointView()*B.adjoint()` +* Performance: + * Improve code generation for `mat*vec` on some compilers. + * Optimize horizontal adds in SSE3 and AVX. + * Speed up row-major TRSM (triangular solve with a matrix as right-hand-side) by reverting `vec/y` to `vec*(1/y)`. The rationale is: + * div is extremely costly + * this is consistent with the column-major case + * this is consistent with all other BLAS implementations + * Remove one temporary in `SparseLU::solve()` +* Others: + * Fix BLAS backend for symmetric rank K updates. + * #1360: fix `-0` vs `+0` issue with Altivec + * #1363: fix mingw's ABI issue + * #1367: fix compilation with gcc 4.1. + * Fix ABI issue with AVX and old gcc versions. + * Fix some warnings with ICC, Power8, etc. + * Fix compilation with MSVC 2017 + +## [3.3.1] - 2016-12-06 + +Changes since 3.3.0: + +* #426: add operators `&&` and `||` to dense and sparse matrices (only dense arrays were supported) +* #1319: add support for CMake's imported targets. +* #1343: fix compilation regression in `array = matrix_product` and `mat+=selfadjoint_view` +* Fix regression in assignment of sparse block to sparse block. +* Fix a memory leak in `Ref` and `Ref`. +* #1351: fix compilation of random with old compilers. +* Fix a performance regression in (mat*mat)*vec for which mat*mat was evaluated multiple times. +* Fix a regression in `SparseMatrix::ReverseInnerIterator` +* Fix performance issue of products for dynamic size matrices with fixed max size. +* implement `float`/`std::complex` for ZVector +* Some fixes for expression-template scalar-types +* #1356: fix undefined behavior with nullptr. +* Workaround some compilation errors with MSVC and MSVC/clr +* #1348: document `EIGEN_MAX_ALIGN_BYTES` and `EIGEN_MAX_STATIC_ALIGN_BYTES`, and reflect in the doc that `EIGEN_DONT_ALIGN*` are deprecated. +* Bugs #1346,#1347: make Eigen's installation relocatable. +* Fix some harmless compilation warnings. + +## [3.3] - 2016-11-10 + +For a comprehensive list of change since the 3.2 series, see this [page](https://www.eigen.tuxfamily.org/index.php?title=3.3). + + +Main changes since 3.3-rc2: +* Fix regression in printing sparse expressions. +* Fix sparse solvers when using a SparseVector as the result and/or right-hand-side. + +## [3.3-rc2] - 2016-11-04 + +For a comprehensive list of change since the 3.2 series, see this [page](https://www.eigen.tuxfamily.org/index.php?title=3.3). + +Main changes since 3.3-rc1: +* Core module + * Add supports for AVX512 SIMD instruction set. + * Bugs #698 and #1004: Improve numerical robustness of LinSpaced methods for both real and integer scalar types ([details](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1DenseBase.html#aaef589c1dbd7fad93f97bd3fa1b1e768)). + * Fix a regression in `X = (X*X.transpose())/scalar` with `X` rectangular (`X` was resized before the evaluation). + * #1311: Fix alignment logic in some cases of `(scalar*small).lazyProduct(small)` + * #1317: fix a performance regression from 3.2 with clang and some nested block expressions. + * #1308: fix compilation of some small products involving nullary-expressions. + * #1333: Fix a regression with `mat.array().sum()` + * #1328: Fix a compilation issue with old compilers introduced in 3.3-rc1. + * #1325: Fix compilation on NEON with clang + * Properly handle negative inputs in vectorized sqrt. + * Improve cost-model to determine the ideal number of threads in matrix-matrix products. +* Geometry module + * #1304: Fix `Projective * scaling` and `Projective *= scaling`. + * #1310: Workaround a compilation regression from 3.2 regarding triangular * homogeneous + * #1312: Quaternion to AxisAngle conversion now ensures the angle will be in the range `[0,pi]`. This also increases accuracy when `q_w` is negative. +* Tensor module + * Add support for OpenCL. + * Improved random number generation. +* Other + * #1330: SuiteSparse, explicitly handle the fact that Cholmod does not support single precision float numbers. + * SuiteSparse, fix SPQR for rectangular matrices + * Fix compilation of `qr.inverse()` for column and full pivoting variants + +## [3.2.10] - 2016-10-04 + +Changes since 3.2.9: + +Main fixes and improvements: +* #1272: Core module, improve comma-initializer in handling empty matrices. +* #1276: Core module, remove all references to `std::binder*` when C++11 is enabled (those are deprecated). +* #1304: Geometry module, fix `Projective * scaling` and `Projective *= scaling`. +* #1300: Sparse module, compilation fix for some block expression and SPQR support. +* Sparse module, fix support for row (resp. column) of a column-major (resp. row-major) sparse matrix. +* LU module, fix 4x4 matrix inversion for non-linear destinations. +* Core module, a few fixes regarding custom complex types. +* #1275: backported improved random generator from 3.3 +* Workaround MSVC 2013 compilation issue in Reverse +* Fix UmfPackLU constructor for expressions. +* #1273: fix shortcoming in eigen_assert macro +* #1249: disable the use of `__builtin_prefetch` for compilers other than GCC, clang, and ICC. +* #1265: fix doc of QR decompositions + +## [3.3-rc1] - 2016-09-22 + +For a comprehensive list of change since the 3.2 series, see this [page](https://www.eigen.tuxfamily.org/index.php?title=3.3). + +Main changes since 3.3-beta2: + +* New features and improvements: + * #645: implement eigenvector computation in GeneralizedEigenSolver + * #1271: add a `SparseMatrix::coeffs()` method returning a linear view of the non-zeros (for compressed mode only). + * #1286: Improve support for custom nullary functors: now the functor only has to expose one relevant operator among `f()`, `f(i)`, `f(i,j)`. + * #1272: improve comma-initializer in handling empty matrices. + * #1268: detect failure in LDLT and report them through info() + * Add support for scalar factor in sparse-selfadjoint `*` dense products, and enable `+=`/`-=` assignment for such products. + * Remove temporaries in product expressions matching `d?=a-b*c` by rewriting them as `d?=a; d?=b*c;` + * Vectorization improvements for some small product cases. + +* Doc: + * #1265: fix outdated doc in QR facto + * #828: improve documentation of sparse block methods, and sparse unary methods. + * Improve documentation regarding nullary functors, and add an example demonstrating the use of nullary expression to perform fancy matrix manipulations. + * Doc: explain how to use Accelerate as a LAPACK backend. + +* Bug fixes and internal changes: + * Numerous fixes regarding support for custom complex types. + * #1273: fix shortcoming in `eigen_assert` macro + * #1278: code formatting + * #1270: by-pass hand written `pmadd` with recent clang versions. + * #1282: fix implicit double to float conversion warning + * #1167: simplify installation of header files using cmake's `install(DIRECTORY ...)` command + * #1283: fix products involving an uncommon `vector.block(..)` expressions. + * #1285: fix a minor regression in LU factorization. + * JacobiSVD now consider any denormal number as zero. + * Numerous fixes regarding support for CUDA/NVCC (including bugs #1266) + * Fix an alignment issue in gemv, symv, and trmv for statically allocated temporaries. + * Fix 4x4 matrix inversion for non-linear destinations. + * Numerous improvements and fixes in half precision scalar type. + * Fix vectorization logic for coeff-based product for some corner cases + * Bugs #1260, #1261, #1264: several fixes in AutoDiffScalar. + +## [3.3-beta2] - 2016-08-26 + +For a comprehensive list of change since the 3.2 series, see this [page](https://www.eigen.tuxfamily.org/index.php?title=3.3). + +Main changes since 3.3-beta1: + +* Dense features: + * #707: Add support for [inplace](http://eigen.tuxfamily.org/dox-devel/group__InplaceDecomposition.html) dense decompositions. + * #977: normalize(d) left the input unchanged if its norm is 0 or too close to 0. + * #977: add stableNormalize[d] methods: they are analogues to normalize[d] but with carefull handling of under/over-flow. + * #279: Implement generic scalar*expr and expr*scalar operators. This is especially useful for custom scalar types, e.g., to enable `float*expr` without conversion. + * New unsupported/Eigen/SpecialFunctions module providing the following coefficient-wise math functions: erf, erfc, lgamma, digamma, polygamma, igamma, igammac, zeta, betainc. + * Add fast reciprocal condition estimators in dense LU and Cholesky factorizations. + * #1230: add support for `SelfadjointView::triangularView()` and `diagonal()` + * #823: add `Quaternion::UnitRandom()` method. + * Add exclusive or operator for bool arrays. + * Relax dependency on MKL for `EIGEN_USE_BLAS` and `EIGEN_USE_LAPACKE`: any BLAS and LAPACK libraries can now be used as backend (see [doc](http://eigen.tuxfamily.org/dox-devel/TopicUsingBlasLapack.html)). + * Add static assertion to `x()`, `y()`, `z()`, `w()` accessors + * #51: avoid dynamic memory allocation in fixed-size rank-updates, matrix products evaluated within a triangular part, and selfadjoint times matrix products. + * #696: enable zero-sized block at compile-time by relaxing the respective assertion + * #779: in `Map`, allows non aligned buffers for buffers smaller than the requested alignment. + * Add a complete orthogonal decomposition class: [CompleteOrthogonalDecomposition](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1CompleteOrthogonalDecomposition.html) + * Improve robustness of JacoviSVD with complexes (underflow, noise amplification in complex to real conversion, compare off-diagonal entries to the current biggest diagonal entry instead of the global biggest, null inputs). + * Change Eigen's ColPivHouseholderQR to use a numerically stable norm downdate formula (changeset acce4dd0500f) + * #1214: consider denormals as zero in D&C SVD. This also workaround infinite binary search when compiling with ICC's unsafe optimizations. + * Add log1p for arrays. + * #1193: now `lpNorm` supports empty inputs. + * #1151: remove useless critical section in matrix product + * Add missing non-const reverse method in `VectorwiseOp` (e.g., this enables `A.rowwise().reverse() = ...`) + * Update RealQZ to reduce 2x2 diagonal block of T corresponding to non reduced diagonal block of S to positive diagonal form. + +* Sparse features: + * #632: add support for "dense +/- sparse" operations. The current implementation is based on SparseView to make the dense subexpression compatible with the sparse one. + * #1095: add Cholmod*::logDeterminant/determinant functions. + * Add `SparseVector::conservativeResize()` method + * #946: generalize `Cholmod::solve` to handle any rhs expressions. + * #1150: make IncompleteCholesky more robust by iteratively increase the shift until the factorization succeed (with at most 10 attempts) + * #557: make InnerIterator of sparse storage types more versatile by adding default-ctor, copy-ctor/assignment. + * #694: document that `SparseQR::matrixR` is not sorted. + * Block expressions now expose all the information defining the block. + * Fix GMRES returned error. + * #1119: add support for SuperLU 5 + +* Performance improvements: + * #256: enable vectorization with unaligned loads/stores. This concerns all architectures and all sizes. This new behavior can be disabled by defining `EIGEN_UNALIGNED_VECTORIZE=0` + * Add support for s390x(zEC13) ZVECTOR instruction set. + * Optimize mixing of real with complex matrices by avoiding a conversion from real to complex when the real types do not match exactly. (see 76faf4a9657e) + * Speedup square roots in performance critical methods such as norm, normalize(d). + * #1154: use dynamic scheduling for spmv products. + * #667, #1181: improve perf with MSVC and ICC through `FORCE_INLINE` + * Improve heuristics for switching between coeff-based and general matrix product implementation at compile-time. + * Add vectorization of tanh for float (SSE/AVX) + * Improve cost estimates of numerous functors. + * Numerous improvements regarding half-packet vectorization: coeff-based products (e.g., `Matrix4f*Vector4f` is now vectorized again when using AVX), reductions, linear vs inner traversals. + * Fix performance regression: with AVX, unaligned stores were emitted instead of aligned ones for fixed size assignment. + * #1201: optimize `affine*vector` products. + * #1191: prevent Clang/ARM from rewriting VMLA into VMUL+VADD. + * Small speed-up in `Quaternion::slerp`. + * #1201: improve code generation of affine*vec with MSVC + +* Doc: + * Add [documentation and exemple](http://eigen.tuxfamily.org/dox-devel/group__MatrixfreeSolverExample.html) for matrix-free solving. + * A new documentation [page](http://eigen.tuxfamily.org/dox-devel/group__CoeffwiseMathFunctions.html) summarizing coefficient-wise math functions. + * #1144: clarify the doc about aliasing in case of resizing and matrix product. + * A new documentation [page](http://eigen.tuxfamily.org/dox-devel/group__DenseDecompositionBenchmark.html) summarizing the true performance of Eigen's dense decomposition algorithms. + +* Misc improvements: + * Allow one generic scalar argument for all binary operators/functions. + * Add a `EIGEN_MAX_CPP_VER` option to limit the C++ version to be used, as well as [fine grained options](http://eigen.tuxfamily.org/dox-devel/TopicPreprocessorDirectives.html#title1) to control individual language features. + * A new [ScalarBinaryOpTraits](http://eigen.tuxfamily.org/dox-devel/structEigen_1_1ScalarBinaryOpTraits.html) class allowing to control how different scalar types are mixed. + * `NumTraits` now exposes a `digits10` function making `internal::significant_decimals_impl` deprecated. + * Countless improvements and fixes in Tensors module. + * #1156: fix several function declarations whose arguments were passed by value instead of being passed by reference + * #1164: fix `std::list` and `std::deque` specializations such that our aligned allocator is automatically activated only when the user did not specified an allocator (or specified the default std::allocator). + * #795: mention allocate_shared as a candidate for aligned_allocator. + * #1170: skip calls to memcpy/memmove for empty inputs. + * #1203: by-pass large stack-allocation in stableNorm if `EIGEN_STACK_ALLOCATION_LIMIT` is too small + * Improve constness of blas level-2/3 interface. + * Implement stricter argument checking for SYRK and SY2K + * Countless improvements in the documentations. + * Internal: Remove `posix_memalign`, `_mm_malloc`, and `_aligned_malloc` special paths. + * Internal: Remove custom unaligned loads for SSE + * Internal: introduce `[U]IntPtr` types to be used for casting pointers to integers. + * Internal: `NumTraits` now exposes `infinity()` + * Internal: `EvalBeforeNestingBit` is now deprecated. + * #1213: workaround gcc linking issue with anonymous enums. + * #1242: fix comma initializer with empty matrices. + * #725: make move ctor/assignment noexcept + * Add minimal support for `Array` + * Improve support for custom scalar types bases on expression template (e.g., `boost::multiprecision::number<>` type). All dense decompositions are successfully tested. + +* Most visible fixes: + * #1144: fix regression in `x=y+A*x` (aliasing issue) + * #1140: fix usage of `_mm256_set_m128` and `_mm256_setr_m128` in AVX support + * #1141: fix some missing initializations in CholmodSupport + * #1143: workaround gcc bug #10200 + * #1145, #1147, #1148, #1149: numerous fixes in PastixSupport + * #1153: don't rely on `__GXX_EXPERIMENTAL_CXX0X__` to detect C++11 support. + * #1152: fix data race in static initialization of blas routines. + * fix some buffer overflow in product block size computation. + * #96, #1006: fix by value argument in result_of + * #178: clean several `const_cast`. + * Fix compilation in `ceil()` function. + * #698: fix linspaced for integer types. + * #1161: fix division by zero for huge scalar types in cache block size computation. + * #774: fix a numerical issue in Umeyama algorithm that produced unwanted reflections. + * #901: fix triangular-view with unit diagonal of sparse rectangular matrices. + * #1166: fix shortcoming in gemv when the destination is not a vector at compile-time. + * #1172: make `SparseMatrix::valuePtr` and `innerIndexPtr` properly return null for empty matrices + * #537: fix a compilation issue in Quaternion with Apples's compiler + * #1186: fix usage of `vreinterpretq_u64_f64` (NEON) + * #1190: fix usage of `__ARM_FEATURE_FMA` on Clang/ARM + * #1189: fix pow/atan2 compilation for `AutoDiffScalar` + * Fix detection of same input-output when applied permutations, or on solve operations. + * Workaround a division by zero in triangular solve when outerstride==0 + * Fix compilation of s`parse.cast<>().transpose()`. + * Fix double-conversion warnings throughout the code. + * #1207: fix logical-op warnings + * #1222, #1223: fix compilation in `AutoDiffScalar`. + * #1229: fix usage of `Derived::Options` in MatrixFunctions. + * #1224: fix regression in `(dense*dense).sparseView()`. + * #1231: fix compilation regression regarding complex_array/=real_array. + * #1221: disable gcc 6 warning: ignoring attributes on template argument. + * Workaround clang/llvm bug 27908 + * #1236: fix possible integer overflow in sparse matrix product. + * #1238: fix `SparseMatrix::sum()` overload for un-compressed mode + * #1240: remove any assumption on NEON vector types + * Improves support for MKL's PARDISO solver. + * Fix support for Visual 2010. + * Fix support for gcc 4.1. + * Fix support for ICC 2016 + * Various Altivec/VSX fixes: exp, support for clang 3.9, + * #1258: fix compilation of `Map::coeffRef` + * #1249: fix compilation with compilers that do not support `__builtin_prefetch` . + * #1250: fix `pow()` for `AutoDiffScalar` with custom nested scalar type. + +## [3.2.9] - 2016-08-18 + +Changes since 3.2.8: + +* Main fixes and improvements: + * Improve numerical robustness of JacobiSVD (backported from 3.3) + * #1017: prevents underflows in `makeHouseholder` + * Fix numerical accuracy issue in the extraction of complex eigenvalue pairs in real generalized eigenvalue problems. + * Fix support for `vector.homogeneous().asDiagonal()` + * #1238: fix `SparseMatrix::sum()` overload for un-compressed mode + * #1213: workaround gcc linking issue with anonymous enums. + * #1236: fix possible integer overflow in sparse-sparse product + * Improve detection of identical matrices when applying a permutation (e.g., `mat = perm * mat`) + * Fix usage of nesting type in blas_traits. In practice, this fixes compilation of expressions such as `A*(A*A)^T` + * CMake: fixes support of Ninja generator + * Add a StorageIndex typedef to sparse matrices and expressions to ease porting code to 3.3 (see http://eigen.tuxfamily.org/index.php?title=3.3#Index_typedef) + * #1200: make `aligned_allocator` c++11 compatible (backported from 3.3) + * #1182: improve generality of `abs2` (backported from 3.3) + * #537: fix compilation of Quaternion with Apples's compiler + * #1176: allow products between compatible scalar types + * #1172: make `valuePtr` and `innerIndexPtr` properly return null for empty sparse matrices. + * #1170: skip calls to `memcpy`/`memmove` for empty inputs. + +* Others: + * #1242: fix comma initializer with empty matrices. + * Improves support for MKL's PARDISO solver. + * Fix a compilation issue with Pastix solver. + * Add some missing explicit scalar conversions + * Fix a compilation issue with matrix exponential (unsupported MatrixFunctions module). + * #734: fix a storage order issue in unsupported Spline module + * #1222: fix a compilation issue in AutoDiffScalar + * #1221: shutdown some GCC6's warnings. + * #1175: fix index type conversion warnings in sparse to dense conversion. + +## [3.2.8] - 2016-02-16 + +Changes since 3.2.7: + +* Main fixes and improvements: + * Make `FullPivLU::solve` use `rank()` instead of `nonzeroPivots()`. + * Add `EIGEN_MAPBASE_PLUGIN` + * #1166: fix issue in matrix-vector products when the destination is not a vector at compile-time. + * #1100: Improve cmake/pkg-config support. + * #1113: fix name conflict with C99's "I". + * Add missing delete operator overloads in `EIGEN_MAKE_ALIGNED_OPERATOR_NEW` + * Fix `(A*B).maxCoeff(i)` and similar. + * Workaround an ICE with VC2015 Update1 x64. + * #1156: fix several function declarations whose arguments were passed by value instead of being passed by reference + * #1164: fix `std::list` and `std::deque` specializations such that our aligned allocator is automatically activatived only when the user did not specified an allocator (or specified the default `std::allocator`). + +* Others: + * Fix BLAS backend (aka MKL) for empty matrix products. + * #1134: fix JacobiSVD pre-allocation. + * #1111: fix infinite recursion in `sparse_column_major.row(i).nonZeros()` (it now produces a compilation error) + * #1106: workaround a compilation issue in Sparse module for msvc-icc combo + * #1153: remove the usage of `__GXX_EXPERIMENTAL_CXX0X__` to detect C++11 support + * #1143: work-around gcc bug in COLAMD + * Improve support for matrix products with empty factors. + * Fix and clarify documentation of Transform wrt `operator*(MatrixBase)` + * Add a matrix-free conjugate gradient example. + * Fix cost computation in CwiseUnaryView (internal) + * Remove custom unaligned loads for SSE. + * Some warning fixes. + * Several other documentation clarifications. + +## [3.3-beta1] - 2015-12-16 + +For a comprehensive list of change since the 3.2 series, see this [page](https://www.eigen.tuxfamily.org/index.php?title=3.3). + +Main changes since 3.3-alpha1: + +* Dense features: + * Add `LU::transpose().solve()` and `LU::adjoint().solve()` API. + * Add `Array::rsqrt()` method as a more efficient shorcut for `sqrt().inverse()`. + * Add `Array::sign()` method for real and complexes. + * Add `lgamma`, `erf`, and `erfc` functions for arrays. + * Add support for row/col-wise `lpNorm()`. + * Add missing `Rotation2D::operator=(Matrix2x2)`. + * Add support for `permutation * homogenous`. + * Improve numerical accuracy in LLT and triangular solve by using true scalar divisions (instead of x * (1/y)). + * Add `EIGEN_MAPBASE_PLUGIN` and `EIGEN_QUATERNION_PLUGIN`. + * #1074: forbid the creation of PlainObjectBase objects. + +* Sparse features: + * Add IncompleteCholesky preconditioner. + * Improve support for [matrix-free iterative solvers](http://eigen.tuxfamily.org/dox/group__MatrixfreeSolverExample.html) + * Extend `setFromTriplets` API to allow passing a functor object controlling how to collapse duplicated entries. + * #918: add access to UmfPack return code and parameters. + * Add support for `dense.cwiseProduct(sparse)`, thus enabling `(dense*sparse).diagonal()` expressions. + * Add support to directly evaluate the product of two sparse matrices within a dense matrix. + * #1064: add support for `Ref`. + * Add supports for real mul/div `sparse` operations. + * #1086: replace deprecated `UF_long` by `SuiteSparse_long`. + * Make `Ref` more versatile. + +* Performance improvements: + * #1115: enable static alignment and thus small size vectorization on ARM. + * Add temporary-free evaluation of `D.nolias() *= C + A*B`. + * Add vectorization of round, ceil and floor for SSE4.1/AVX. + * Optimize assignment into a `Block` by using Ref and avoiding useless updates in non-compressed mode. This make row-by-row filling of a row-major sparse matrix very efficient. + * Improve internal cost model leading to faster code in some cases (see changeset 77ff3386b7d2). + * #1090: improve redux evaluation logic. + * Enable unaligned vectorization of small fixed size matrix products. + +* Misc improvements: + * Improve support for `isfinite`/`isnan`/`isinf` in fast-math mode. + * Make the IterativeLinearSolvers module compatible with MPL2-only mode by defaulting to COLAMDOrdering and NaturalOrdering for ILUT and ILLT respectively. + * Avoid any OpenMP calls if multi-threading is explicitly disabled at runtime. + * Make abs2 compatible with custom complex types. + * #1109: use noexcept instead of throw for C++11 compilers. + * #1100: Improve cmake/pkg-config support. + * Countless improvements and fixes in Tensors module. + +* Most visible fixes: + * #1105: fix default preallocation when moving from compressed to uncompressed mode in SparseMatrix. + * Fix UmfPackLU constructor for expressions. + * Fix degenerate cases in syrk and trsm BLAS API. + * Fix matrix to quaternion (and angleaxis) conversion for matrix expression. + * Fix compilation of sparse-triangular to dense assignment. + * Fix several minor performance issues in the nesting of matrix products. + * #1092: fix iterative solver ctors for expressions as input. + * #1099: fix missing include for CUDA. + * #1102: fix multiple definition linking issue. + * #1088: fix setIdenity for non-compressed sparse-matrix. + * Fix `SparseMatrix::insert`/`coeffRef` for non-empty compressed matrix. + * #1113: fix name conflict with C99's "I". + * #1075: fix `AlignedBox::sample` for runtime dimension. + * #1103: fix NEON vectorization of `complex` multiplication. + * #1134: fix JacobiSVD pre-allocation. + * Fix ICE with VC2015 Update1. + * Improve cmake install scripts. + +## [3.2.7] - 2015-11-05 + +Changes since 3.2.6: + +* Main fixes and improvements: + * Add support for `dense.cwiseProduct(sparse)`. + * Fix a regression regarding `(dense*sparse).diagonal()`. + * Make the `IterativeLinearSolvers` module compatible with MPL2-only mode by defaulting to `COLAMDOrdering` and `NaturalOrdering` for ILUT and ILLT respectively. + * #266: backport support for c++11 move semantic + * `operator/=(Scalar)` now performs a true division (instead of `mat*(1/s)`) + * Improve numerical accuracy in LLT and triangular solve by using true scalar divisions (instead of `mat * (1/s)`) + * #1092: fix iterative solver constructors for expressions as input + * #1088: fix `setIdenity` for non-compressed sparse-matrix + * #1086: add support for recent SuiteSparse versions + +* Others: + * Add overloads for real-scalar times `SparseMatrix` operations. This avoids real to complex conversions, and also fixes a compilation issue with MSVC. + * Use explicit Scalar types for AngleAxis initialization + * Fix several shortcomings in cost computation (avoid multiple re-evaluation in some very rare cases). + * #1090: fix a shortcoming in redux logic for which slice-vectorization plus unrolling might happen. + * Fix compilation issue with MSVC by backporting `DenseStorage::operator=` from devel branch. + * #1063: fix nesting of unsupported/AutoDiffScalar to prevent dead references when computing second-order derivatives + * #1100: remove explicit `CMAKE_INSTALL_PREFIX` prefix to conform to cmake install's `DESTINATION` parameter. + * unsupported/ArpackSupport is now properly installed by make install. + * #1080: warning fixes + +## [3.2.6] - 2015-10-01 + +Changes since 3.2.5: + +* fix some compilation issues with MSVC 2013, including bugs #1000 and #1057 +* SparseLU: fixes to support `EIGEN_DEFAULT_TO_ROW_MAJOR` (#1053), and for empty (#1026) and some structurally rank deficient matrices (#792) +* #1075: fix `AlignedBox::sample()` for Dynamic dimension +* fix regression in AMD ordering when a column has only one off-diagonal non-zero (used in sparse Cholesky) +* fix Jacobi preconditioner with zero diagonal entries +* fix Quaternion identity initialization for non-implicitly convertible types +* #1059: fix `predux_max` for NEON +* #1039: fix some issues when redefining `EIGEN_DEFAULT_DENSE_INDEX_TYPE` +* #1062: fix SelfAdjointEigenSolver for RowMajor matrices +* MKL: fix support for the 11.2 version, and fix a naming conflict (#1067) + * #1033: explicit type conversion from 0 to RealScalar + +## [3.3-alpha1] - 2015-09-04 + +See the [announcement](https://www.eigen.tuxfamily.org/index.php?title=3.3). + +## [3.2.5] - 2015-06-16 + +Changes since 3.2.4: + +* Changes with main impact: + * Improve robustness of SimplicialLDLT to semidefinite problems by correctly handling structural zeros in AMD reordering + * Re-enable supernodes in SparseLU (fix a performance regression in SparseLU) + * Use zero guess in `ConjugateGradients::solve` + * Add `PermutationMatrix::determinant` method + * Fix `SparseLU::signDeterminant()` method, and add a SparseLU::determinant() method + * Allows Lower|Upper as a template argument of CG and MINRES: in this case the full matrix will be considered + * #872: remove usage of std::bind* functions (deprecated in c++11) + +* Numerical robustness improvements: + * #1014: improve numerical robustness of the 3x3 direct eigenvalue solver + * #1013: fix 2x2 direct eigenvalue solver for identical eigenvalues + * #824: improve accuracy of `Quaternion::angularDistance` + * #941: fix an accuracy issue in ColPivHouseholderQR by continuing the decomposition on a small pivot + * #933: improve numerical robustness in RealSchur + * Fix default threshold value in SPQR + +* Other changes: + * Fix usage of `EIGEN_NO_AUTOMATIC_RESIZING` + * Improved support for custom scalar types in SparseLU + * Improve cygwin compatibility + * #650: fix an issue with sparse-dense product and rowmajor matrices + * #704: fix MKL support (HouseholderQR) + * #705: fix handling of Lapack potrf return code (LLT) + * #714: fix matrix product with OpenMP support + * #949: add static assertions for incompatible scalar types in many of the dense decompositions + * #957, #1000: workaround MSVC/ICC compilation issues when using sparse blocks + * #969: fix ambiguous calls to Ref + * #972, #986: add support for coefficient-based product with 0 depth + * #980: fix taking a row (resp. column) of a column-major (resp. row-major) sparse matrix + * #983: fix an alignement issue in Quaternion + * #985: fix RealQZ when either matrix had zero rows or columns + * #987: fix alignement guess in diagonal product + * #993: fix a pitfall with matrix.inverse() + * #996, #1016: fix scalar conversions + * #1003: fix handling of pointers non aligned on scalar boundary in slice-vectorization + * #1010: fix member initialization in IncompleteLUT + * #1012: enable alloca on Mac OS or if alloca is defined as macro + * Doc and build system: #733, #914, #952, #961, #999 + +## [3.2.4] - 2015-01-21 + +Changes since 3.2.3: + +* Fix compilation regression in Rotation2D +* #920: fix compilation issue with MSVC 2015. +* #921: fix utilization of bitwise operation on enums in `first_aligned`. +* Fix compilation with NEON on some platforms. + +## [3.2.3] - 2014-12-16 + +Changes since 3.2.2: + +* Core: + * Enable `Mx0 * 0xN` matrix products. + * #859: fix returned values for vectorized versions of `exp(NaN)`, `log(NaN)`, `sqrt(NaN)` and `sqrt(-1)`. + * #879: tri1 = mat * tri2 was compiling and running incorrectly if tri2 was not numerically triangular. Workaround the issue by evaluating mat*tri2 into a temporary. + * #854: fix numerical issue in SelfAdjointEigenSolver::computeDirect for 3x3 matrices. + * #884: make sure there no call to malloc for zero-sized matrices or for a Ref<> without temporaries. + * #890: fix aliasing detection when applying a permutation. + * #898: MSVC optimization by adding inline hint to const_cast_ptr. + * #853: remove enable_if<> in Ref<> ctor. + +* Dense solvers: + * #894: fix the sign returned by LDLT for multiple calls to `compute()`. + * Fix JacobiSVD wrt underflow and overflow. + * #791: fix infinite loop in JacobiSVD in the presence of NaN. + +* Sparse: + * Fix out-of-bounds memory write when the product of two sparse matrices is completely dense and performed using pruning. + * UmfPack support: fix redundant evaluation/copies when calling `compute()`, add support for generic expressions as input, and fix extraction of the L and U factors (#911). + * Improve `SparseMatrix::block` for const matrices (the generic path was used). + * Fix memory pre-allocation when permuting inner vectors of a sparse matrix. + * Fix `SparseQR::rank` for a completely empty matrix. + * Fix `SparseQR` for row-major inputs. + * Fix `SparseLU::absDeterminant` and add respective unit test. + * BiCGSTAB: make sure that good initial guesses are not destroyed by a bad preconditioner. + +* Geometry: + * Fix `Hyperplane::Through(a,b,c)` when points are aligned or identical. + * Fix linking issues in OpenGLSupport. + +* OS, build system and doc: + * Various compilation fixes including: #821, #822, #857, #871, #873. + * Fix many compilation warnings produced by recent compilers including: #909. + * #861: enable posix_memalign with PGI. + * Fix BiCGSTAB doc example. + +## [3.2.2] - 2014-08-04 + +Changes since 3.2.1: + +* Core: + * Relax Ref such that `Ref` accepts a `RowVectorXf` which can be seen as a degenerate `MatrixXf(1,N)` + * Fix performance regression for the vectorization of sub columns/rows of matrices. + * `EIGEN_STACK_ALLOCATION_LIMIT`: Raise its default value to 128KB, make use of it to assert on maximal fixed size object, and allows it to be 0 to mean "no limit". + * #839: Fix 1x1 triangular matrix-vector product. + * #755: `CommaInitializer` produced wrong assertions in absence of Return-Value-Optimization. + +* Dense solvers: + * Add a `rank()` method with threshold control to JacobiSVD, and make solve uses it to return the minimal norm solution for rank-deficient problems. + * Various numerical fixes in JacobiSVD, including:#843, and the move from Lapack to Matlab strategy for the default threshold. + * Various numerical fixes in LDLT, including the case of semi-definite complex matrices. + * Fix `ColPivHouseholderQR::rank()`. + * #222: Make temporary matrix column-major independently of `EIGEN_DEFAULT_TO_ROW_MAJOR` in BlockHouseholder. + +* Sparse: + * #838: Fix `dense * sparse` and `sparse * dense` outer products and detect outer products from either the lhs or rhs. + * Make the ordering method of SimplicialL[D]LT configurable. + * Fix regression in the restart mechanism of BiCGSTAB. + * #836: extend SparseQR to support more columns than rows. + * #808: Use double instead of float for the increasing size ratio in `CompressedStorage::resize`, fix implicit conversions from int/longint to float/double, and fix `set_from_triplets` temporary matrix type. + * #647: Use `smart_copy` instead of bitwise memcpy in CompressedStorage. + * GMRES: Initialize essential Householder vector with correct dimension. + +* Geometry: + * #807: Missing scalar type cast in `umeyama()` + * #806: Missing scalar type cast in `Quaternion::setFromTwoVectors()` + * #759: Removed hard-coded double-math from `Quaternion::angularDistance`. + +* OS, build system and doc: + * Fix compilation with Windows CE. + * Fix some ICEs with VC11. + * Check IMKL version for compatibility with Eigen + * #754: Only inserted (`!defined(_WIN32_WCE)`) analog to alloc and free implementation. + * #803: Avoid `char*` to `int*` conversion. + * #819: Include path of details.h file. + * #738: Use the "current" version of cmake project directories to ease the inclusion of Eigen within other projects. + * #815: Fix doc of FullPivLU wrt permutation matrices. + * #632: doc: Note that `dm2 = sm1 + dm1` is not possible + * Extend AsciiQuickReference (real, imag, conjugate, rot90) + +## [3.2.1] - 2014-02-26 + +Changes since 3.2.0: + +* Eigen2 support is now deprecated and will be removed in version 3.3. +* Core: + * Bug fix for Ref object containing a temporary matrix. + * #654: Allow construction of row vector from 1D array. + * #679: Support `cwiseMin()` and `cwiseMax()` on maps. + * Support `conservativeResize()` on vectors. + * Improve performance of vectorwise and replicate expressions. + * #642: Add vectorization of sqrt for doubles, and make sqrt really safe if `EIGEN_FAST_MATH` is disabled. + * #616: Try harder to align columns when printing matrices and arrays. + * #579: Add optional run-time parameter to fixed-size block methods. + * Implement `.all()` and `.any()` for zero-sized objects + * #708: Add placement new and delete for arrays. + * #503: Better C++11 support. +* Dense linear algebra: + * #689: Speed up some matrix-vector products by using aligned loads if possible. + * Make solve in `FullPivHouseholderQR` return least-square solution if there is no exact solution. + * #678: Fix `fullPivHouseholderQR` for rectangular matrices. + * Fix a 0/0 issue in JacobiSVD. + * #736: Wrong result in `LDLT::isPositiveDefinite()` for semi-definite matrices. + * #740: Fix overflow issue in `stableNorm()`. + * Make pivoting HouseholderQR compatible with custom scalar types. +* Geometry: + * Fix compilation of Transform * UniformScaling +* Sparse matrices: + * Fix elimination tree and SparseQR for fat rectangular matrices. + * #635: add `isCompressed` to `MappedSparseMatrix` for compatibility. + * #664: Support iterators without `operator<` in `setFromTriplets()`. + * Fixes in SparseLU: infinite loop, aliasing issue when solving, overflow in memory allocation, use exceptions only if enabled (#672). + * Fixes in SparseQR: reduce explicit zero, assigning result to map, assert catching non-conforming sizes, memory leak. + * #681: Uninitialized value in CholmodSupport which may lead to incorrect results. + * Fix some issues when using a non-standard index type (#665 and more) + * Update constrained CG (unsupported module) to Eigen3. +* OS and build system: + * MacOS put OpenGL header files somewhere else from where we expected it. + * Do not assume that `alloca()` is 16-byte aligned on Windows. + * Compilation fixes when using ICC with Visual Studio. + * Fix Fortran compiler detection in CMake files. +* Fix some of our tests (bugs #744 and #748 and more). +* Fix a few compiler warnings (bug #317 and more). +* Documentation fixes (bugs #609, #638 and #739 and more). + +## [3.1.4] - 2013-08-02 + +Changes since 3.1.3: + +* #620: Fix robustness and performance issues in JacobiSVD::solve. +* #613: Fix accuracy of SSE sqrt for very small numbers. +* #608: Fix sign computation in LDLT. +* Fix write access to CwiseUnaryView expressions. +* Fix compilation of `transposeInPlace()` for Array expressions. +* Fix non const `data()` member in Array and Matrix wrappers. +* Fix a few warnings and compilation issues with recent compiler versions. +* Documentation fixes. + +## [3.0.7] - 2013-08-02 + +Changes since 3.0.6: + +* Fix traits of `Map`. +* Fix a few warnings (#507) and documentation (#531). + +## [3.2.0] - 2013-07-24 + +Major new features and optimizations since 3.1: + +* Dense world + * New [`Ref<>`](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1Ref.html) class allowing to write non templated function taking various kind of Eigen dense objects without copies. + * New [RealQZ](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1RealQZ.html) factorization and [GeneralizedEigenSolver](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1GeneralizedEigenSolver.html). + * Add vector-wise normalized and normalize functions, and hasNaN/allFinite members. + * Add mixed static/dynamic-size `.block<.,.>()` functions. + * Optimize outer products for non rank-1 update operations. + * Optimize diagonal products (enable vectorization in more cases). + * Improve robustness and performance in `JacobiSVD::solve()`. +* Sparse world + * New [SparseLU](http://eigen.tuxfamily.org/dox-devel/group__SparseLU__Module.html) module: built-in sparse LU with supernodes and numerical row pivoting (port of SuperLU making the SuperLUSupport module obsolete). + * New [SparseQR](http://eigen.tuxfamily.org/dox-devel/group__SparseQR__Module.html) module: rank-revealing sparse QR factorization with numerical column pivoting. + * New [COLAMD](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1COLAMDOrdering.html) ordering and unified [ordering API](http://eigen.tuxfamily.org/dox-devel/group__OrderingMethods__Module.html). + * Add support for generic blocks of sparse matrices (read-only). + * Add conservative resize feature on sparse matrices. + * Add uniform support for solving sparse systems with sparse right hand sides. + * Add support for sparse matrix time sparse self-adjoint view products. + * Improve BiCGSTAB robustness with restart. +* Support to external libraries + * New [MetisSupport](http://eigen.tuxfamily.org/dox-devel/group__MetisSupport__Module.html) module: wrapper to the famous graph partitioning library. + * New [SPQRSupport](http://eigen.tuxfamily.org/dox-devel/group__SPQRSupport__Module.html) module: wrapper to suitesparse's supernodal QR solver. + +Eigen 3.2 represents about 600 commits since Eigen 3.1. + +## [3.2-rc2] - 2013-07-19 + +Changes since 3.2-rc1: + +* Rename `DenseBase::isFinite()` to `allFinite()` to avoid a future naming collision. +* Fix an ICE with ICC 11.1. + +## [3.2-rc1] - 2013-07-17 + +Main changes since 3.2-beta1: +* New features: + * #562: Add vector-wise normalized and normalize functions. + * #564: Add `hasNaN` and `isFinite` members. + * #579: Add support for mixed static/dynamic-size `.block()`. + * #588: Add support for determinant in SparseLU. + * Add support in SparseLU to solve with L and U factors independently. + * Allow multiplication-like binary operators to be applied on type combinations supported by `scalar_product_traits`. + * #596: Add conversion from `SparseQR::matrixQ()` to a `SparseMatrix`. + * #553: Add support for sparse matrix time sparse self-adjoint view products. + +* Accuracy and performance: + * Improve BiCGSTAB robustness: fix a divide by zero and allow to restart with a new initial residual reference. + * #71: Enable vectorization of diagonal products in more cases. + * #620: Fix robustness and performance issues in JacobiSVD::solve. + * #609: Improve accuracy and consistency of the eulerAngles functions. + * #613: Fix accuracy of SSE sqrt for very small numbers. + * Enable SSE with ICC even when it mimics a gcc version lower than 4.2. + * Add SSE4 min/max for integers. + * #590 & #591: Minor improvements in NEON vectorization. + +* Bug fixes: + * Fix `HouseholderSequence::conjugate()` and `::adjoint()`. + * Fix SparseLU for dense matrices and matrices in non compressed mode. + * Fix `SparseMatrix::conservativeResize()` when one dimension is null. + * Fix `transposeInpPlace` for arrays. + * Fix `handmade_aligned_realloc`. + * #554: Fix detection of the presence of `posix_memalign` with mingw. + * #556: Workaround mingw bug with `-O3` or `-fipa-cp-clone` options. + * #608: Fix sign computation in LDLT. + * #567: Fix iterative solvers to immediately return when the initial guess is the true solution and for trivial solution. + * #607: Fix support for implicit transposition from dense to sparse vectors. + * #611: Fix support for products of the form `diagonal_matrix * sparse_matrix * diagonal_matrix`. + +* Others: + * #583: Add compile-time assertion to check DenseIndex is signed. + * #63: Add lapack unit tests. They are automatically downloaded and configured if `EIGEN_ENABLE_LAPACK_TESTS` is ON. + * #563: Assignment to `Block` is now allowed on non-compressed matrices. + * #626: Add assertion on input ranges for coeff* and insert members for sparse objects. + * #314: Move special math functions from internal to numext namespace. + * Fix many warnings and compilation issues with recent compiler versions. + * Many other fixes including #230, #482, #542, #561, #564, #565, #566, #578, #581, #595, #597, #598, #599, #605, #606, #615. + +## [3.1.3] - 2013-04-16 + +Changes since 3.1.2: + +* #526 - Fix linear vectorized transversal in linspace. +* #551 - Fix compilation issue when using `EIGEN_DEFAULT_DENSE_INDEX_TYPE`. +* #533 - Fix some missing const qualifiers in Transpose +* Fix a compilation with CGAL::Gmpq by adding explicit internal:: namespace when calling `abs()`. +* Fix computation of outer-stride when calling `.real()` or `.imag()`. +* Fix `handmade_aligned_realloc` (affected `conservativeResize()`). +* Fix sparse vector assignment from a sparse matrix. +* Fix `log(0)` with SSE. +* Fix bug in aligned_free with windows CE. +* Fix traits of `Map`. +* Fix a few warnings (#507, #535, #581). +* Enable SSE with ICC even when it mimics a gcc version lower than 4.2 +* Workaround [gcc-4.7 bug #53900](http://gcc.gnu.org/bugzilla/show_bug.cgi?id=53900) (too aggressive optimization in our alignment check) + +## [3.2-beta1] - 2013-03-07 + +Main changes since 3.1: + +* Dense modules + * A new [`Ref<>`](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1Ref.html) class allowing to write non templated function taking various kind of Eigen dense objects without copies. + * New [RealQZ](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1RealQZ.html) factorization and [GeneralizedEigenSolver](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1GeneralizedEigenSolver.html) + * Optimized outer products for non rank-1 update operations. + +* Sparse modules + * New [SparseLU](http://eigen.tuxfamily.org/dox-devel/group__SparseLU__Module.html) module: built-in sparse LU with supernodes and numerical row pivoting (port of SuperLU making the SuperLUSupport module obsolete). + * New [SparseQR](http://eigen.tuxfamily.org/dox-devel/group__SparseQR__Module.html) module: rank-revealing sparse QR factorization with numerical column pivoting. + * OrderingMethods: extended with [COLAMD](http://eigen.tuxfamily.org/dox-devel/classEigen_1_1COLAMDOrdering.html) ordering and a unified [ordering](http://eigen.tuxfamily.org/dox-devel/group__OrderingMethods__Module.html) API. + * Support for generic blocks of sparse matrices. + * Add conservative resize feature on sparse matrices. + * Add uniform support for solving sparse systems with sparse right hand sides. + +* Support to external libraries + * New [MetisSupport](http://eigen.tuxfamily.org/dox-devel/group__MetisSupport__Module.html) module: wrapper to the famous graph partitioning library. + * New [SPQRSupport](http://eigen.tuxfamily.org/dox-devel/group__SPQRSupport__Module.html) module: wrapper to suitesparse's supernodal QR solver. + +* Misc + * Improved presentation and clarity of Doxygen generated documentation (modules are now organized into chapters, treeview panel and search engine for quick navagitation). + * New compilation token `EIGEN_INITIALIZE_MATRICES_BY_NAN` to help debugging. + * All bug fixes of the 3.1 branch, plus a couple of other fixes (including 211, 479, 496, 508, 552) + +## [3.1.2] - 2012-11-05 + +Changes since 3.1.1: + +* #524 - Pardiso's parameter array does not have to be aligned! +* #521 - Disable `__cpuidex` on architectures different that x86 or x86-64 with MSVC. +* #519 - `AlignedBox::dim()` was wrong for dynamic dimensions. +* #515 - Fix missing explicit scalar conversion. +* #511 - Fix pretty printers on windows. +* #509 - Fix warnings with gcc 4.7 +* #501 - Remove aggressive mat/scalar optimization (was replaced by `mat*(1/scalar)` for non integer types). +* #479 - Use EISPACK's strategy re max number of iters in Schur decomposition. +* Add support for scalar multiple of diagonal matrices. +* Forward `resize()` function from Array/Matrix wrappers to the nested expression such that `mat.array().resize(a,b)` is now allowed. +* Windows CE: fix the lack of the `aligned_malloc` function on this platform. +* Fix comma initializer when inserting empty matrices. +* Fix `dense=sparse*diagonal` products. +* Fix compilation with `m.array().min(scalar)` and `m.array().max(scalar)`. +* Fix out-of-range memory access in GEMV (the memory was not used for the computation, only to assemble unaligned packets from aligned packet loads). +* Fix various regressions with MKL support. +* Fix aliasing issue in sparse matrix assignment. +* Remove stupid assert in blue norm. +* Workaround a weird compilation error with MSVC. + +## [3.1.1] - 2012-07-22 + +Changes since 3.1.0: +* [relicense to MPL2](https://www.eigen.tuxfamily.org/index.php?title=Main_Page#License) +* add a `EIGEN_MPL2_ONLY` build option to generate compiler errors when including non-MPL2 modules +* remove dynamic allocation for triangular matrix-matrix products of fixed size objects +* Fix possible underflow issues in SelfAdjointEigenSolver +* Fix issues with fixed-size Diagonal (sub/super diagonal size computation was wrong) +* #487 - Geometry module: `isometry * scaling` compilation error +* #486 - MKL support: fixed multiple-references linker errors with various decompositions +* #480 - work around compilation error on Android NDK due to isfinite being defined as a macro +* #485 - IterativeLinearSolvers: conflict between a typedef and template type parameter +* #479 - Eigenvalues/Schur: Adjust max iterations count to matrix size +* Fixed Geometry module compilation under MSVC +* Fixed Sparse module compilation under MSVC 2005 + +## [3.0.6] - 2012-07-09 + +Changes since 3.0.5: +* #447 - fix infinite recursion in `ProductBase::coeff()` +* #478 - fix RealSchur on a zero matrix +* #477 - fix warnings with gcc 4.7 +* #475 - `.exp()` now returns `+inf` when overflow occurs (SSE) +* #466 - fix a possible race condition in OpenMP environment (for non OpenMP thread model it is recommended to upgrade to 3.1) +* #362 - fix missing specialization for affine-compact `*` projective +* #451 - fix a clang warning +* Fix compilation of `somedensematrix.llt().matrixL().transpose()` +* Fix miss-use of the cost-model in Replicate +* Fix use of int versus Index types for `Block::m_outerStride` +* Fix ambiguous calls to some std functions +* Fix geometry tutorial on scalings +* Fix RVCT 3.1 compiler errors +* Fix implicit scalar conversion in Transform +* Fix typo in NumericalDiff (unsupported module) +* Fix LevenbergMarquart for non double scalar type (unsupported module) + +## [3.1.0] - 2012-06-24 + +Major changes between Eigen 3.0 and Eigen 3.1: +* New features + * **New set of officially supported Sparse Modules** + ** This includes sparse matrix storage, assembly, and many built-in (Cholesky, CG, BiCGSTAB, ILU), and third-party (PaStiX, Cholmod, UmfPack, SuperLU, Pardiso) solvers + ** See this [page](http://eigen.tuxfamily.org/dox-devel/TutorialSparse.html) for an overview of the features + * **Optional support for Intel MKL** + ** This includes the BLAS, LAPACK, VML, and Pardiso components + ** See this [page](http://eigen.tuxfamily.org/dox-devel/TopicUsingIntelMKL.html) for the details + * Core + ** New vector-wise operators: `*`, `/`, `*=`, `/=` + ** New coefficient-wise operators: `&&`, `||`, `min(Scalar)`, `max(Scalar)`, `pow`, `operator/(Scalar,ArrayBase)` + * Decompositions + ** Add incremental rank-updates in LLTand LDLT + ** New `SelfAdjointEigenSolver::computeDirect()` function for fast eigen-decomposition through closed-form formulas (only for 2x2 and 3x3 real matrices) +* Optimizations + * Memory optimizations in JacobiSVD and triangular solves. + * Optimization of reductions via partial unrolling (e.g., dot, sum, norm, etc.) + * Improved performance of small matrix-matrix products and some Transform<> operations + +Eigen 3.1 represents about 600 commits since Eigen 3.0. + +## [3.1.0-rc2] - 2012-06-21 + +Changes since 3.1.0-rc1: +* Fix a couple of compilation warnings +* Improved documentation, in particular regarding the Geometry and Sparse tutorials, and sparse solver modules +* Fix double preconditioner allocation in `JacobiSVD` +* #466: `RealSchur` failed on a zero matrix +* Update Adolc and MPReal support modules + +## [3.1.0-rc1] - 2012-06-14 + +Main changes since 3.1.0-beta1: +* #466: fix a possible race condition issue. from now, multithreaded applications that call Eigen from multiple thread must initialize Eigen by calling `initParallel()`. +* For consistency, `SimplicialLLT` and `SimplicialLDLT` now factorizes `P A P^-1` (instead of `P^-1 A P`). +* #475: now the vectorized `exp` operator returns +inf when overflow occurs +* Fix the use of MKL with MSVC by disabling MKL's pow functions. +* Avoid dynamic allocation for fixed size triangular solving +* Fix a compilation issue with ICC 11.1 +* Fix ambiguous calls in the math functors +* Fix BTL interface. + +## [3.1.0-beta1] - 2012-06-07 + +Main changes since 3.1.0-alpha2: +* **API changes** + * `SimplicialLLt` and `SimplicialLDLt` are now renamed `SimplicialLLT` and `SimplicialLDLT` for consistency with the other modules. + * The Pardiso support module is now spelled "PardisoSupport" +* Dense modules: + * Add `operator/(Scalar,ArrayBase)` and coefficient-wise pow operator. + * Fix automatic evaluation of expressions nested by Replicate (performance improvement) + * #447 - fix infinite recursion in `ProductBase::coeff()` + * #455 - add support for c++11 in `aligned_allocator` + * `LinSpace`: add a missing variant, and fix the size=1 case +* Sparse modules: + * Add an **IncompleteLU** preconditioner with dual thresholding. + * Add an interface to the parallel **Pastix** solver + * Improve applicability of permutations (add `SparseMatrixBase::twistedBy`, handle non symmetric permutations) + * `CholmodDecomposition` now has explicit variants: `CholmodSupernodalLLT`, `CholmodSimplicialLLT`, `CholmodSimplicialLDLT` + * Add analysePattern/factorize methods to iterative solvers + * Preserve explicit zero in a sparse assignment + * Speedup `sparse * dense` products + * Fix a couple of issues with Pardiso support +* Geometry module: + * Improve performance of some `Transform<>` operations by better preserving the alignment status. + * #415 - wrong return type in `Rotation2D::operator*=` + * #439 - add `Quaternion::FromTwoVectors()` static constructor + * #362 - missing specialization for affine-compact `*` projective +* Others: + * add support for RVCT 3.1 compiler + * New tutorial page on Map + * and many other bug fixes such as: #417, #419, #450 + +## [3.0.5] - 2012-02-10 + +Changes since 3.0.4: +* #417 - fix nesting of `Map` expressions +* #415 - fix return value of `Rotation2D::operator*=` +* #410 - fix a possible out of range access in `EigenSolver` +* #406 - fix infinite loop/deadlock when using OpenMP and Eigen +* Changeset 4462 - fix broken asserts revealed by Clang +* Changeset 4457 - fix description of `rankUpdate()` in quick reference guide +* Changeset 4455 - fix out-of-range int constant in 4x4 inverse +* #398 - fix in slerp: the returned quaternion was not always normalized +* Changeset 4432 - fix asserts in eigenvalue decompositions +* Changeset 4416 - fix MSVC integer overflow warning + +## [3.1.0-alpha2] - 2012-02-06 + +Main changes since 3.0.1-alpha1: +* New optional support for Intel MKL and other BLAS including: ([details](http://eigen.tuxfamily.org/dox-devel/TopicUsingIntelMKL.html)) + * BLAS (arbitrary BLAS) + * Intel LAPACKE + * Intel VML (coefficient-wise math operations) + * Intel PARDISO (sparse direct solver) +* Dense modules: + * improved performance of small matrix-matrix products + * Feature #319 - add a rankUpdate function to LDLt and LLT for updates/downdates + * Feature #400 - new coefficient wise min/max functions taking one scalar argument +* Sparse modules: + * new fast sparse matrix assembly interface from a random list of triplets (see `SparseMatrix::setFromTriplets()`) + * new shifting feature in SimplicialCholesky (see `SimplicialCholeskyBase::setShift()`) + * add checks for positive definiteness in SimplicialCholesky + * improved heuristic to predict the nnz of a `sparse*sparse` product + * add support for uncompressed SparseMatrix in CholmodSupport +* Geometry module: + * Feature #297 - add `ParametrizedLine::intersectionPoint()` and `intersectionParam()` functions +* Others: + * fix many warnings and compilation issues with ICC 12 and -strict-ansi + * fix some ICE with MSVC10 + * add the possibility to disable calls to cpuid (`-DEIGEN_NO_CPUID`) and other asm directives + * and many other bug fixes such as: #406, #410, #398, #396, #394, #354, #352, #301, + + +## [3.1.0-alpha1] - 2011-12-06 + +Main changes since 3.0: +* Officially supported set of sparse modules. See this [page](http://eigen.tuxfamily.org/dox-devel/TutorialSparse.html) for an overview of the features. Main changes: + * new `SparseCore` module equivalent to the old `Sparse` module, the `Sparse` module is now a super module including all sparse-related modules + * the `SparseMatrix` class is now more versatile and supports an uncompressed mode for fast element insertion + * the `SparseMatrix` class now offer a unique and simplified API to insert elements + * `DynamicSparseMatrix` has been deprecated (moved into `unsupported/SparseExtra`) + * new conservative `sparse * sparse` matrix product which is also used by default + * new `SparseCholesky` module featuring the SimplicialLLT and SimplicialLDLT built-in solvers + * new `IterativeLinearSolvers` module featuring a conjugate gradient and stabilized bi-conjugate gradient iterative solvers with a basic Jacobi preconditioner +* New `SelfAdjointEigenSolver::computeDirect()` function for fast eigen-decomposition through closed-form formulas (only for 2x2 and 3x3 real matrices) +* New `LLT::rankUpdate()` function supporting both updates and down-dates +* Optimization of reduction via partial unrolling (e.g., dot, sum, norm, etc.) +* New coefficient-wise operators: `&&` and `||` +* Feature #157 - New vector-wise operations for arrays: `*`, `/`, `*=`, and `/=`. +* Feature #206 - Pre-allocation of intermediate buffers in JacobiSVD +* Feature #370 - New typedefs for AlignedBox +* All the fixes and improvements of the 3.0 branch up to the 3.0.4 release (see below) + + +## [3.0.4] - 2011-12-06 + +Changes since 3.0.3: + +* #363 - check for integer overflow in size computations +* #369 - Quaternion alignment is broken (and more alignment fixes) +* #354 - Converge better in SelfAdjointEigenSolver, and allow better handling of non-convergent cases +* #347 - Fix compilation on ARM NEON with LLVM 3.0 and iOS SDK 5.0 +* #372 - Put unsupported modules documentation at the right place +* #383 - Fix C++11 compilation problem due to some constructs mis-interpreted as c++11 user-defined literals +* #373 - Compilation error with clang 2.9 when exceptions are disabled +* Fix compilation issue with `QuaternionBase::cast` + + +## [2.0.17] - 2011-12-06 + +Changes since 2.0.16: + +* Fix a compilation bug in `aligned_allocator`: the allocate method should take a void pointer +* Fix a typo in ParametrizedLine documentation + + +## [3.0.3] - 2011-10-06 + +Changes since 3.0.2: + +* Fix compilation errors when Eigen2 support is enabled. +* Fix bug in evaluating expressions of the form `matrix1 * matrix2 * scalar1 * scalar2`. +* Fix solve using LDLT for singular matrices if solution exists. +* Fix infinite loop when computing SVD of some matrices with very small numbers. +* Allow user to specify pkgconfig destination. +* Several improvements to the documentation. + + +## [3.0.2] - 2011-08-26 + +Changes since 3.0.1: + +* `Windows.h`: protect min/max calls from macros having the same name (no need to `#undef` min/max anymore). +* MinGW: fix compilation issues and pretty gdb printer. +* Standard compliance: fix aligned_allocator and remove uses of long long. +* MPReal: updates for the new version. +* Other fixes: + * fix aligned_stack_memory_handler for null pointers. + * fix std::vector support with gcc 4.6. + * fix linking issue with OpenGL support. + * fix SelfAdjointEigenSolver for 1x1 matrices. + * fix a couple of warnings with new compilers. + * fix a few documentation issues. + + +## [3.0.1] - 2011-05-30 + +Changes since 3.0.0: + +* Fix many bugs regarding ARM and NEON (Now all tests succeed on ARM/NEON). +* Fix compilation on gcc 4.6 +* Improved support for custom scalar types: + * Fix memory leak issue for scalar types throwing exceptions. + * Fix implicit scalar type conversion. + * Math functions can be defined in the scalar type's namespace. +* Fix bug in trapezoidal matrix time matrix product. +* Fix asin. +* Fix compilation with MSVC 2005 (SSE was wrongly enabled). +* Fix bug in `EigenSolver`: normalize the eigen vectors. +* Fix Qt support in Transform. +* Improved documentation. + +## [2.0.16] - 2011-05-28 + +Changes since 2.0.15: + +* Fix bug in 3x3 tridiagonlisation (and consequently in 3x3 selfadjoint eigen decomposition). +* Fix compilation for new gcc 4.6. +* Fix performance regression since 2.0.12: in some matrix-vector product, complex matrix expressions were not pre-evaluated. +* Fix documentation of Least-Square. +* New feature: support for `part`. +* Fix bug in SparseLU::setOrderingMethod. + +## [3.0.0] - 2011-03-19 + +Released at the [meeting](https://www.eigen.tuxfamily.org/index.php?title=Paris_2011_Meeting). + +See the [Eigen 3.0 release notes](https://www.eigen.tuxfamily.org/index.php?title=3.0). + +Only change since 3.0-rc1: +* Fixed compilation of the unsupported 'openglsupport' test. + +## [3.0-rc1] - 2011-03-14 + +Main changes since 3.0-beta4: + +* Core: added new `EIGEN_RUNTIME_NO_MALLOC` option and new `set_is_malloc_allowed()` option to finely control where dynamic memory allocation is allowed. Useful for unit-testing of functions that must not cause dynamic memory allocations. +* Core: SSE performance fixes (follow-up from #203). +* Core: Fixed crashes when using `EIGEN_DONT_ALIGN` or `EIGEN_DONT_ALIGN_STATICALLY` (#213 and friends). +* Core: `EIGEN_DONT_ALIGN` and `EIGEN_DONT_ALIGN_STATICALLY` are now covered by unit tests. +* Geometry: Fixed transform * matrix products (#207). +* Geometry: compilation fix for mixing CompactAffine with Homogeneous objects +* Geometry: compilation fix for 1D transform +* SVD: fix non-computing constructors (correctly forward `computationOptions`) (#206) +* Sparse: fix resizing when the destination sparse matrix is row major (#37) +* more Eigen2Support improvements +* more unit test fixes/improvements +* more documentation improvements +* more compiler warnings fixes +* fixed GDB pretty-printer for dynamic-size matrices (#210) + +## [3.0-beta4] - 2011-02-28 + +Main changes since 3.0-beta3: + +* Non-vectorization bug fixes: + * fix #89: work around an extremely evil compiler bug on old GCC (<= 4.3) with the standard `assert()` macro + * fix Umfpack back-end in the complex case +* Vectorization bug fixes: + * fix a segfault in "slice vectorization" when the destination might not be aligned on a scalar (`complex`) + * fix #195: fast SSE unaligned loads fail on GCC/i386 and on Clang + * fix #186: worked around a GCC 4.3 i386 backend issue with SSE + * fix #203: SSE: a workaround used in pset1() resulted in poor assembly + * worked around a GCC 4.2.4 internal compiler error with vectorization of complex numbers + * lots of AltiVec compilation fixes + * NEON compilation fixes +* API additions and error messages improvements + * Transform: prevent bad user code from compiling + * fix #190: directly pass Transform Options to Matrix, allowing to use RowMajor. Fix issues in Transform with non-default Options. + * factorize implementation of standard real unary math functions, and add acos, asin +* Build/tests system + * Lots of unit test improvements + * fix installation of unsupported modules + * fixed many compiler warnings, especially on the Intel compiler and on LLVM/Clang + * CTest/CMake improvements + * added option to build in 32bit mode +* BLAS/LAPACK implementation improvements + * The Blas library and tests are now automatically built as part of the tests. + * expanded LAPACK interface (including syev) + * now Sparse solver backends use our own BLAS/LAPACK implementation + * fix #189 (cblat1 test failure) +* Documentation + * improved conservativeResize methods documentation + * documented sorting of eigenvalues + * misc documentation improvements + * improve documentation of plugins + +## [3.0-beta3] - 2011-02-12 + +The biggest news is that the API is now **100% stable**. + +Main changes since 3.0-beta2: + +* The "too many to list them all" category: + * lots of bug fixes + * lots of performance fixes + * lots of compiler support fixes + * lots of warning fixes + * lots of unit tests improvements and fixes + * lots of documentation improvements + * lots of build system fixes +* API changes: + * replaced `ei_` prefix by `internal::` namespace. For example, `ei_cos(x)` becomes `internal::cos(x)`. + * renamed `PlanarRotation` -> `JacobiRotation` + * renamed `DenseStorageBase` -> `PlainObjectBase` + * HouseholderSequence API cleanup + * refactored internal metaprogramming helpers to follow closely the standard library + * made UpperBidiagonalization internal + * made BandMatrix/TridiagonalMatrix internal + * Core: also see below, "const correctness". + * Sparse: `EIGEN_YES_I_KNOW_SPARSE_MODULE_IS_NOT_STABLE_YET` must be defined to use Eigen/Sparse + * Core: `random()` now spans over range of width `RAND_MAX` +* New API: + * Core: added Map static methods taking strides + * SVD: added `jacobiSvd()` method + * Sparse: many misc improvements and new features. Improved support for Cholmod, Amd, SuperLU and other back-ends. + * Core: allow mixed real-complex dot products + * Geometry: allow mixed real-complex cross products + * Geometry: allow to pass Options parameters to Transform, Quaternion and other templates, to control memory alignment + * QR: add threshold API to FullPivHouseholderQR + * Core: added tan function +* Const correctness: + * Eigen now properly enforces const-correctness everywhere, for example with Map objects. This will break compilation of code that relied on former behavior. + * A new kind of test suite was added to check that, 'failtest'. +* BLAS/LAPACK: + * Complete BLAS library built on top of Eigen. Imported BLAS test suite, which allowed to fix many issues. + * Partial LAPACK implementation. Passing part of the LAPACK test suite, which also allowed to fix some issues. +* Eigen 2 Support: + * tons of improvements in `EIGEN2_SUPPORT` + * new incremental migration path: see http://eigen.tuxfamily.org/dox-devel/Eigen2SupportModes.html + * imported a copy of the Eigen 2 test suite, made sure that Eigen 3 passes it. That also allowed to fix several issues. + + +## [3.0-beta2] - 2010-10-15 + +Main changes since 3.0-beta1: + +* Add support for the vectorization of `std::complex<>` with SSE, AltiVec and NEON. +* Add support for mixed `real * complex` matrix products with vectorization. +* Finalize the JacobiSVD class with: compile time options, thin/full decompositions, and least-square solving. +* Several improvement of the Transform class. In particular, there is no default mode anymore. +* New methods: `middleRows()`, `middleCols()`, `TriangularMatrix::conjugate()` +* New unsupported modules: OpenGL, MPFR C++ +* Many improvements in the support of empty objects. +* Many improvements of the vectorization logic. +* Add the possibility to extend QuaternionBase. +* Vectorize Quaternion multiplication with double. +* Significant improvements of the documentation. +* Improved compile time errors. +* Enforce static allocation of temporary buffers in gemm (when possible). +* Fix aligned_delete for null pointers and non trivial dtors. +* Fix eigen decomposition of 3x3 float matrices. +* Fix 4x4 matrix inversions (vectorization). +* Many fixes in QR: solving with `m>n`, use of rank, etc. +* Fixes for MSVC for windows mobile and CLang. +* Remove the Taucs backend (obsolete). +* Remove the old SVD class (was causing too much troubles, a new decompozition based on bidiagonalisation/householder should come back soon, `JacobiSVD` can be used meanwhile). + +## [2.0.15] - 2010-07-16 + +Changes since 2.0.14: + +* Fix bug: certain cases of matrix-vector product (depending on storage order) were blocked by an assertion failure. +* Fix LU and QR solve when rank==0, fix LLT when the matrix is purely 0. +* Fix a couple of bugs with QR solving especially with rows>cols. +* Fix bug with custom scalar types that have non-trivial destructor. +* Fix for ICC in SSE code. +* Fix some C++ issues found by Clang (patch by Nick Lewycky). + +## [3.0-beta1] - 2010-07-05 + +See the [announcement](https://www.eigen.tuxfamily.org/index.php?title=3.0). + +## [2.0.14] - 2010-06-22 + +Changes since 2.0.13: + +* Fix #141: crash in SSE (alignment problem) when using dynamic-size matrices with a max-size fixed at compile time that is not a multiple of 16 bytes. For example, `Matrix`. +* Fix #142: LU of fixed-size matrices was causing dynamic memory allocation (patch by Stuart Glaser). +* Fix #127: remove useless static keywords (also fixes warnings with clang++). + +## [2.0.13] - 2010-06-10 + +Changes since 2.0.12: + +* Fix #132: crash in certain matrix-vector products. Unit test added. +* Fix #125: colwise `norm()` and `squaredNorm()` on complex types do not return real types +* Fully support the QCC/QNX compiler (thanks to Piotr Trojanek). The support in 2.0.12 was incomplete. The whole test suite is now successful. +* As part of the QCC support work, a lot of standards compliance work: put `std::` in front of a lot of things such as `size_t`, check whether the math library needs to be linked to explicitly. +* Fix precision issues in LDLT. The `isPositiveDefinite()` method is now always returning true, but it was conceptually broken anyway, since a non-pivoting LDLT decomposition can't know that. +* Compilation fix in `ldlt()` on expressions. +* Actually install the Eigen/Eigen and Eigen/Dense public headers! +* Fix readcost for complex types. +* Fix compilation of the BTL benchmarks. +* Some dox updates. + +## [2.0.12] - 2010-02-12 + +Changes since 2.0.11: + +* `EIGEN_DEFAULT_TO_ROW_MAJOR` is fully supported and tested. +* Several important fixes for row-major matrices. +* Fix support of several algorithms for mixed fixed-dynamic size matrices where the fixed dimension is greater than the dynamic dimension. For example: `Matrix(3,2)` +* fix `EIGEN_DONT_ALIGN`: now it _really_ disables vectorization (was giving a `#error` unless you also used `EIGEN_DONT_VECTORIZE`). +* Fix #92: Support QNX's QCC compiler (patch by Piotr Trojanek) +* Fix #90, missing type cast in LU, allow to use LU with MPFR (patch by 'Wolf'). +* Fix ICC compiler support: work around a bug present at least in ICC 11.1. +* Compilation fixes for `computeInverse()` on expressions. +* Fix a gap in a unit-test (thanks to Jitse Niesen) +* Backport improvements to benchmarking code. +* Documentation fixes + +## [2.0.11] - 2010-01-10 + +Changes since 2.0.10: + +* Complete rewrite of the 4x4 matrix inversion: we now use the usual cofactors approach, so no numerical stability problems anymore (bug #70) +* Still 4x4 matrix inverse: SSE path for the float case, borrowing code by Intel, giving very high performance. +* Fix crash happening on 32-bit x86 Linux with SSE, when double's were created at non-8-byte-aligned locations (bug #79). +* Fix bug in Part making it crash in certain products (bug #80). +* Precision improvements in Quaternion SLERP (bug #71). +* Fix sparse triangular solver for lower/row-major matrices (bug #74). +* Fix MSVC 2010 compatibility. +* Some documentation improvements. + +## [2.0.10] - 2009-11-25 + +Changes since 2.0.9: + +* Rewrite 4x4 matrix inverse to improve precision, and add a new unit test to guarantee that precision. It's less fast, but it's still faster than the cofactors method. +* Fix bug #62: crash in SSE code with MSVC 2008 (Thanks to Hauke Heibel). +* Fix bug #65: `MatrixBase::nonZeros()` was recursing infinitely +* Fix PowerPC platform detection on Mac OSX. +* Prevent the construction of bogus MatrixBase objects and generate good compilation errors for that. Done by making the default constructor protected, and adding some private constructors. +* Add option to initialize all matrices by zero: just #define `EIGEN_INITIALIZE_MATRICES_BY_ZERO` +* Improve Map documentation +* Install the pkg-config file to share/pkgconfig, instead of lib/pkgconfig (thanks to Thomas Capricelli) +* fix warnings +* fix compilation with MSVC 2010 +* adjust to repository name change + +## [2.0.9] - 2009-10-24 + +Changes since 2.0.8: + +* Really fix installation and the pkg-config file. +* Install the `NewStdVector` header that was introduced in 2.0.6. + +## [2.0.8] - 2009-10-23 + +Changes since 2.0.7: + +* fix installation error introduced in 2.0.7: it was choking on the pkg-config file eigen2.pc not being found. The fix had been proposed long ago by Ingmar Vanhassel for the development branch, and when recently the pkg-config support was back-ported to the 2.0 branch, nobody thought of backporting this fix too, and apparently nobody tested "make install" ! +* SVD: add default constructor. Users were relying on the compiler to generate one, and apparenty 2.0.7 triggered a little MSVC 2008 subtlety in this respect. Also added an assert. + +## [2.0.7] - 2009-10-22 + +Changes since 2.0.6: + +* fix bug #61: crash when using Qt `QVector` on Windows 32-bit. By Hauke Heibel. +* fix bug #10: the `reallocateSparse` function was half coded +* fix bug in `SparseMatrix::resize()` not correctly initializing by zero +* fix another bug in `SparseMatrix::resize()` when `outerSize==0`. By Hauke Heibel. +* fully support GCC 3.3. It was working in 2.0.2, there was a compilation error in 2.0.6, now for the first time in 2.0.7 it's 100% tested (the test suite passes without any errors, warnings, or failed tests). +* SVD: add missing assert (help catch mistakes) +* fixed warnings in unit-tests (Hauke Heibel) +* finish syncing `Memory.h` with the devel branch. This is cleaner and fixes a warning. The choice of system aligned malloc function may be affected by this change. +* add pkg-config support by Rhys Ulerich. +* documentation fix and doc-generation-script updates by Thomas Capricelli + +## [2.0.6] - 2009-09-23 + +Changes since 2.0.5: + +* fix bug: visitors didn't work on row-vectors. +* fix bug #50: compilation errors with `swap()`. +* fix bug #42: Add `Transform::Identity()` as mentioned in the tutorial. +* allow to disable all alignment code by defining `EIGEN_DONT_ALIGN` (backport from devel branch). +* backport the devel branch's `StdVector` header as `NewStdVector`. You may also #define `EIGEN_USE_NEW_STDVECTOR` to make `StdVector` use it automatically. However, by default it isn't used by `StdVector`, to preserve compatibility. +* Vectorized quaternion product (for float) by Rohit Garg (backport from devel branch). +* allow to override `EIGEN_RESTRICT` and add `EIGEN_DONT_USE_RESTRICT_KEYWORD` +* fix a warning in `ei_aligned_malloc`; fixed by backporting the body from the devel branch; may result in a different choice of system aligned malloc function. +* update the documentation. + +## [2.0.5] - 2009-08-22 + +Changes since 2.0.4: + +* fix bug: in rare situations involving mixed storage orders, a matrix product could be evaluated as its own transpose +* fix bug: `LU::solve()` crashed when called on the LU decomposition of a zero matrix +* fix bug: `EIGEN_STACK_ALLOCATION_LIMIT` was too high, resulting in stack overflow for a user. Now it is set as in the devel branch. +* fix compilation bug: our `StdVector` header didn't work with GCC 4.1. (Bug #41) +* fix compilation bug: missing return statement in `Rotation2D::operator*=` (Bug #36) +* in StdVector, a more useful `#error` message about the #including order +* add `EIGEN_TRANSFORM_PLUGIN` allowing to customize the Transform class +* fix a warning with MSVC +* fix a bug in our cmake code when building unit-tests (thanks to Marcus Hanwell) +* work around a bug in cmake that made it fail to build unit-tests when fortran wasn't installed +* in our cmake code, remove the part about retrieving the mercurial info and appending it to the version number in the dox +* dox: remove the old example list +* fix the option to build a binary library, although it's not very useful and will be removed +* add basic .hgignore file and script to build the docs (thanks to Thomas Capricelli) + +## [2.0.4] - 2009-08-01 + +Changes since 2.0.3: +* Several fixes in the overloaded new and delete operators. Thanks to Hauke Heibel. +* compilation fix: add the missing `ei_atan2` function. Thanks to Manuel Yguel. +* Use `ei_atan2` instead of using `std::atan2` directly. +* several compilation fixes in the Qt interoperability code: methods `toQTransform()` and `toQMatrix()`. Thanks to Anthony Truchet. +* compilation fix and simplification in Matrix assignment +* compilation fixes in `a *= b` and `a = a*b` when a has to be resized. +* remove a "stupid" version of `ei_pow`. for integers for gcc >= 4.3 +* bug fix in `Quaternion::setFromTwoVectors()` +* several ctest improvements: use our own dashboard, use a separate project for the 2.0 branch. +* documentation: improvement on the pages on unaligned arrays (the online copies have been updated immediately). + +## [2.0.3] - 2009-06-21 + +Changes since 2.0.2: +* precision and reliability fixes in various algorithms, especially LLT, QR, Tridiagonalization, and also a precision improvement in LU. +* fix LLT and LDLT solve() on uninitialized result (was causing an assertion). +* add Eigen/Eigen and Eigen/Dense headers for convenience +* document a newly found cause for the "unaligned array" assertion +* backport documentation improvements on transpose() and adjoint() +* updates in the Sparse module (was needed to support KDE 4.3) + +## [2.0.2] - 2009-05-22 + +Changes since 2.0.1: +* Fix `linearRegression()` compilation, actually it is reimplemented using the better fitHyperplane() which does total least-squares. +* Add missing `setZero()` etc... variants taking size parameters and resizing. These were mentioned in the tutorial but weren't implemented. +* Fix `posix_memalign` platform check. This fixes portability issues. Thanks to Ross Smith. +* Fix detection of SSE2 on the Windows 64-bit platform. +* Fix compatibility with the old GCC 3.3: it is now fully supported again. +* Fix warnings with recent GCC (4.4.0 and 4.3.3). + +## [2.0.1] - 2009-04-14 + +Changes since 2.0.0: +* disable alignment altogether on exotic platforms on which we don't vectorize anyway. This allows e.g. to use Eigen on ARM platforms. +* new StdVector header with a new workaround for the problems with std::vector. +* workarounds for MSVC internal compiler errors +* MSVC 9 compilation fix (patch by Hauke Heibel) +* fixes for various bugs in Maps/Blocks that could give wrong results +* fix bug in 4x4 matrix inverse that could give wrong results +* compilation fix in SliceVectorization +* fix wrong static assertion (patch by Markus Moll) +* add missing operators in `aligned_allocator` (thanks to Hauke Heibel) + +## [2.0.0] - 2009-02-02 + +First public release. \ No newline at end of file diff --git a/o-voxel/third_party/eigen/CMakeLists.txt b/o-voxel/third_party/eigen/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e7b0fafc0b8b333b7f557c6ab2e51b66d412360 --- /dev/null +++ b/o-voxel/third_party/eigen/CMakeLists.txt @@ -0,0 +1,874 @@ +cmake_minimum_required(VERSION 3.10.0) + +#============================================================================== +# CMake Policy issues. +#============================================================================== +# Allow overriding options in a parent project via `set` before including Eigen. +if (POLICY CMP0077) + cmake_policy (SET CMP0077 NEW) +endif (POLICY CMP0077) + +# NOTE Remove setting the policy once the minimum required CMake version is +# increased to at least 3.15. Retain enabling the export to package registry. +if (POLICY CMP0090) + # The export command does not populate package registry by default + cmake_policy (SET CMP0090 NEW) + # Unless otherwise specified, always export to package registry to ensure + # backwards compatibility. + if (NOT DEFINED CMAKE_EXPORT_PACKAGE_REGISTRY) + set (CMAKE_EXPORT_PACKAGE_REGISTRY ON) + endif (NOT DEFINED CMAKE_EXPORT_PACKAGE_REGISTRY) +endif (POLICY CMP0090) + +# Disable warning about find_package(CUDA). +# CUDA language support is lacking for clang as the CUDA compiler +# until at least cmake version 3.18. Even then, there seems to be +# issues on Windows+Ninja in passing build flags. Continue using +# the "old" way for now. +if (POLICY CMP0146) + cmake_policy(SET CMP0146 OLD) +endif () + +# Normalize DESTINATION paths +if (POLICY CMP0177) + cmake_policy(SET CMP0177 NEW) +endif () + +# Respect _ROOT variables. +if (POLICY CMP0074) + cmake_policy(SET CMP0074 NEW) +endif () + +#============================================================================== +# CMake Project. +#============================================================================== + +project(Eigen3) + +# Remove this block after bumping CMake to v3.21.0 +# PROJECT_IS_TOP_LEVEL is defined then by default +if(CMAKE_VERSION VERSION_LESS 3.21.0) + if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) + set(PROJECT_IS_TOP_LEVEL ON) + else() + set(PROJECT_IS_TOP_LEVEL OFF) + endif() +endif() + +#============================================================================== +# Build ON/OFF Settings. +#============================================================================== +# Determine if we should build tests. +include(CMakeDependentOption) +cmake_dependent_option(BUILD_TESTING "Enable creation of tests." ON "PROJECT_IS_TOP_LEVEL" OFF) +option(EIGEN_BUILD_TESTING "Enable creation of Eigen tests." ${BUILD_TESTING}) +option(EIGEN_LEAVE_TEST_IN_ALL_TARGET "Leaves tests in the all target, needed by ctest for automatic building." OFF) + +# Determine if we should build BLAS/LAPACK implementations. +option(EIGEN_BUILD_BLAS "Toggles the building of the Eigen Blas library" ${PROJECT_IS_TOP_LEVEL}) +option(EIGEN_BUILD_LAPACK "Toggles the building of the included Eigen LAPACK library" ${PROJECT_IS_TOP_LEVEL}) +if (EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK) + # Determine if we should build shared libraries for BLAS/LAPACK on this platform. + if (NOT EIGEN_BUILD_SHARED_LIBS) + get_cmake_property(EIGEN_BUILD_SHARED_LIBS TARGET_SUPPORTS_SHARED_LIBS) + endif() +endif() + +option(EIGEN_BUILD_BTL "Build benchmark suite" OFF) +option(EIGEN_BUILD_SPBENCH "Build sparse benchmark suite" OFF) +option(EIGEN_BUILD_AOCL_BENCH "Build AOCL benchmark" OFF) +# Avoid building docs if included from another project. +# Building documentation requires creating and running executables on the host +# platform. We shouldn't do this if cross-compiling. +if (PROJECT_IS_TOP_LEVEL AND NOT CMAKE_CROSSCOMPILING) + set(EIGEN_BUILD_DOC_DEFAULT ON) +endif() +option(EIGEN_BUILD_DOC "Enable creation of Eigen documentation" ${EIGEN_BUILD_DOC_DEFAULT}) + +option(EIGEN_BUILD_DEMOS "Toggles the building of the Eigen demos" ${PROJECT_IS_TOP_LEVEL}) + +# Disable pkgconfig only for native Windows builds +if(NOT WIN32 OR NOT CMAKE_HOST_SYSTEM_NAME MATCHES Windows) + option(EIGEN_BUILD_PKGCONFIG "Build pkg-config .pc file for Eigen" ${PROJECT_IS_TOP_LEVEL}) +endif() +option(EIGEN_BUILD_CMAKE_PACKAGE "Enables the creation of EigenConfig.cmake and related files" ${PROJECT_IS_TOP_LEVEL}) + +if (EIGEN_BUILD_TESTING OR EIGEN_BUILD_BLAS OR EIGEN_BUILD_LAPACK OR EIGEN_BUILT_BTL OR EIGEN_BUILD_BTL OR EIGEN_BUILD_SPBENCH OR EIGEN_BUILD_DOC OR EIGEN_BUILD_DEMOS) + set(EIGEN_IS_BUILDING_ ON) +endif() + +#============================================================================== +# Version Info. +#============================================================================== + +# If version information is not provided, automatically parse the version number +# from header files. +file(READ "${PROJECT_SOURCE_DIR}/Eigen/Version" _eigen_version_header) +if (NOT DEFINED EIGEN_WORLD_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_WORLD_VERSION[ \t]+([0-9]+)" _eigen_world_version_match "${_eigen_version_header}") + set(EIGEN_WORLD_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_MAJOR_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_MAJOR_VERSION[ \t]+([0-9]+)" _eigen_major_version_match "${_eigen_version_header}") + set(EIGEN_MAJOR_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_MINOR_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_MINOR_VERSION[ \t]+([0-9]+)" _eigen_minor_version_match "${_eigen_version_header}") + set(EIGEN_MINOR_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_PATCH_VERSION) + string(REGEX MATCH "define[ \t]+EIGEN_PATCH_VERSION[ \t]+([0-9]+)" _eigen_patch_version_match "${_eigen_version_header}") + set(EIGEN_PATCH_VERSION "${CMAKE_MATCH_1}" CACHE STRING "") +endif() +if (NOT DEFINED EIGEN_PRERELEASE_VERSION) + set(EIGEN_PRERELEASE_VERSION "dev") +endif() + +# If we are in a git repo, extract a changeset. +if(IS_DIRECTORY ${CMAKE_SOURCE_DIR}/.git) + # if the git program is absent or this will leave the EIGEN_GIT_REVNUM string empty, + # but won't stop CMake. + execute_process(COMMAND git ls-remote -q ${CMAKE_SOURCE_DIR} HEAD OUTPUT_VARIABLE EIGEN_GIT_OUTPUT) +endif() + +# extract the git rev number from the git output... +if(EIGEN_GIT_OUTPUT) + string(REGEX MATCH "^([0-9;a-f]+).*" EIGEN_GIT_CHANGESET_MATCH "${EIGEN_GIT_OUTPUT}") + set(EIGEN_GIT_REVNUM "${CMAKE_MATCH_1}") +endif() + +if (NOT DEFINED EIGEN_BUILD_VERSION AND DEFINED EIGEN_GIT_REVNUM) + string(SUBSTRING "${EIGEN_GIT_REVNUM}" 0 8 EIGEN_BUILD_VERSION) +else() + set(EIGEN_BUILD_VERSION "" CACHE STRING "") +endif() + +# The EIGEN_VERSION_NUMBER must be of the form . +# The EIGEN_VERSION_STRING can contain the preprelease/build strings. +set(EIGEN_VERSION_NUMBER "${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION}.${EIGEN_PATCH_VERSION}" CACHE STRING "") +set(EIGEN_VERSION_STRING "${EIGEN_VERSION_NUMBER}" CACHE STRING "") +if (NOT "x${EIGEN_PRERELEASE_VERSION}" STREQUAL "x") + set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}-${EIGEN_PRERELEASE_VERSION}" CACHE STRING "") +endif() +if (NOT "x${EIGEN_BUILD_VERSION}" STREQUAL "x") + set(EIGEN_VERSION_STRING "${EIGEN_VERSION_STRING}+${EIGEN_BUILD_VERSION}" CACHE STRING "") +endif() + + +# Generate version file. +configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Version.in" + "${CMAKE_CURRENT_BINARY_DIR}/include/Eigen/Version") + +#============================================================================== +# Install Path Configuration. +#============================================================================== + +# Unconditionally allow install of targets to support nested dependency +# installations. +# +# Note: projects that depend on Eigen should _probably_ exclude installing +# Eigen by default (e.g. by using EXCLUDE_FROM_ALL when using +# FetchContent_Declare or add_subdirectory) to avoid overwriting a previous +# installation. + +include(GNUInstallDirs) +# Backward compatibility support for EIGEN_INCLUDE_INSTALL_DIR +if(EIGEN_INCLUDE_INSTALL_DIR) + message(WARNING "EIGEN_INCLUDE_INSTALL_DIR is deprecated. Use INCLUDE_INSTALL_DIR instead.") +endif() + +if(EIGEN_INCLUDE_INSTALL_DIR AND NOT INCLUDE_INSTALL_DIR) + set(INCLUDE_INSTALL_DIR ${EIGEN_INCLUDE_INSTALL_DIR} + CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where Eigen header files are installed") +else() + set(INCLUDE_INSTALL_DIR + "${CMAKE_INSTALL_INCLUDEDIR}/eigen3" + CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where Eigen header files are installed" + ) +endif() +set(CMAKEPACKAGE_INSTALL_DIR + "${CMAKE_INSTALL_DATADIR}/eigen3/cmake" + CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where Eigen3Config.cmake is installed" + ) +set(PKGCONFIG_INSTALL_DIR + "${CMAKE_INSTALL_DATADIR}/pkgconfig" + CACHE PATH "The directory relative to CMAKE_INSTALL_PREFIX where eigen3.pc is installed" + ) + +foreach(var INCLUDE_INSTALL_DIR CMAKEPACKAGE_INSTALL_DIR PKGCONFIG_INSTALL_DIR) + # If an absolute path is specified, make it relative to "{CMAKE_INSTALL_PREFIX}". + if(IS_ABSOLUTE "${${var}}") + file(RELATIVE_PATH "${var}" "${CMAKE_INSTALL_PREFIX}" "${${var}}") + endif() +endforeach() + +#============================================================================== +# Eigen Library. +#============================================================================== + +# Alias Eigen_*_DIR to Eigen3_*_DIR: +set(Eigen_SOURCE_DIR ${Eigen3_SOURCE_DIR}) +set(Eigen_BINARY_DIR ${Eigen3_BINARY_DIR}) + +# Imported target support +add_library (eigen INTERFACE) +add_library (Eigen3::Eigen ALIAS eigen) +target_include_directories (eigen INTERFACE + $ + $ +) + +# Eigen requires at least C++14 +target_compile_features (eigen INTERFACE cxx_std_14) + +# Export as title case Eigen +set_target_properties (eigen PROPERTIES EXPORT_NAME Eigen) + +#============================================================================== +# Install Rule Configuration. +#============================================================================== + +install(FILES + signature_of_eigen3_matrix_library + DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel + ) + +if(EIGEN_BUILD_PKGCONFIG) + configure_file(eigen3.pc.in eigen3.pc @ONLY) + install(FILES ${CMAKE_CURRENT_BINARY_DIR}/eigen3.pc + DESTINATION ${PKGCONFIG_INSTALL_DIR}) +endif() + +install(DIRECTORY Eigen DESTINATION ${INCLUDE_INSTALL_DIR} COMPONENT Devel) +# Replace the "Version" header file with the generated one. +install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/Eigen/Version + DESTINATION ${INCLUDE_INSTALL_DIR}/Eigen/ COMPONENT Devel) + +install(TARGETS eigen EXPORT Eigen3Targets) + +if(EIGEN_BUILD_CMAKE_PACKAGE) + include (CMakePackageConfigHelpers) + configure_package_config_file ( + ${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3Config.cmake.in + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + INSTALL_DESTINATION ${CMAKEPACKAGE_INSTALL_DIR} + NO_SET_AND_CHECK_MACRO # Eigen does not provide legacy style defines + NO_CHECK_REQUIRED_COMPONENTS_MACRO # Eigen does not provide components + ) + + set(CVF_VERSION "${EIGEN_VERSION_NUMBER}") + configure_file("${CMAKE_CURRENT_SOURCE_DIR}/cmake/Eigen3ConfigVersion.cmake.in" + "Eigen3ConfigVersion.cmake" + @ONLY) + + # The Eigen target will be located in the Eigen3 namespace. Other CMake + # targets can refer to it using Eigen3::Eigen. + export (TARGETS eigen NAMESPACE Eigen3:: FILE Eigen3Targets.cmake) + # Export Eigen3 package to CMake registry such that it can be easily found by + # CMake even if it has not been installed to a standard directory. + export (PACKAGE Eigen3) + + install (EXPORT Eigen3Targets NAMESPACE Eigen3:: DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}) + + install (FILES ${CMAKE_CURRENT_BINARY_DIR}/Eigen3Config.cmake + ${CMAKE_CURRENT_BINARY_DIR}/Eigen3ConfigVersion.cmake + DESTINATION ${CMAKEPACKAGE_INSTALL_DIR}) + + # Add uninstall target + if(NOT TARGET uninstall AND PROJECT_IS_TOP_LEVEL) + add_custom_target ( uninstall + COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/EigenUninstall.cmake) + endif() +endif() + +#============================================================================== +# General Build Configuration. +#============================================================================== + +# Avoid setting the standard in a parent if unset. +if(PROJECT_IS_TOP_LEVEL) + set(CMAKE_CXX_STANDARD 14 CACHE STRING "Default C++ standard") + set(CMAKE_CXX_STANDARD_REQUIRED ON CACHE BOOL "Require C++ standard") + set(CMAKE_CXX_EXTENSIONS OFF CACHE BOOL "Allow C++ extensions") +endif() + +# Guard against in-source builds +if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR}) + message(FATAL_ERROR "In-source builds not allowed. Please make a new directory (called a build directory) and run CMake from there. You may need to remove CMakeCache.txt. ") +endif() + +# Guard against bad build-type strings +if (PROJECT_IS_TOP_LEVEL AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +# Only try to figure out how to link the math library if we are building something. +# Otherwise, let the parent project deal with dependencies. +if (EIGEN_IS_BUILDING_) + # Use Eigen's cmake files. + set(CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake) + + set(CMAKE_INCLUDE_CURRENT_DIR OFF) + + find_package(StandardMathLibrary) + find_package(AOCL QUIET) + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "") + if(AOCL_FOUND) + list(APPEND EIGEN_STANDARD_LIBRARIES_TO_LINK_TO ${AOCL_LIBRARIES}) + if(AOCL_INCLUDE_DIRS) + include_directories(${AOCL_INCLUDE_DIRS}) + endif() + endif() + + if(NOT STANDARD_MATH_LIBRARY_FOUND) + message(FATAL_ERROR + "Can't link to the standard math library. Please report to the Eigen developers, telling them about your platform.") + else() + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO} ${STANDARD_MATH_LIBRARY}") + else() + set(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO "${STANDARD_MATH_LIBRARY}") + endif() + # Clean up any leading/trailing whitespace in the variable to avoid CMP0004 errors + string(STRIP "${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}" EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + endif() + + + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + message(STATUS "Standard libraries to link to explicitly: ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}") + else() + message(STATUS "Standard libraries to link to explicitly: none") + endif() + + # Default tests/examples/libraries to row-major. + option(EIGEN_DEFAULT_TO_ROW_MAJOR "Use row-major as default matrix storage order" OFF) + if(EIGEN_DEFAULT_TO_ROW_MAJOR) + add_definitions("-DEIGEN_DEFAULT_TO_ROW_MAJOR") + endif() +endif() + +#============================================================================== +# Test Configuration. +#============================================================================== + +if (EIGEN_BUILD_TESTING) + function(ei_maybe_separate_arguments variable mode args) + # Use separate_arguments if the input is a single string containing a space. + # Otherwise, if it is already a list or doesn't have a space, just propagate + # the original value. This is to better support multi-argument lists. + list(LENGTH args list_length) + if (${list_length} EQUAL 1) + string(FIND "${args}" " " has_space) + if (${has_space} GREATER -1) + separate_arguments(args ${mode} "${args}") + endif() + endif() + set(${variable} ${args} PARENT_SCOPE) + endfunction(ei_maybe_separate_arguments) + + include(CheckCXXCompilerFlag) + macro(ei_add_cxx_compiler_flag FLAG) + string(REGEX REPLACE "-" "" SFLAG1 ${FLAG}) + string(REGEX REPLACE "\\+" "p" SFLAG ${SFLAG1}) + check_cxx_compiler_flag(${FLAG} COMPILER_SUPPORT_${SFLAG}) + if(COMPILER_SUPPORT_${SFLAG}) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${FLAG}") + endif() + endmacro() + + set(EIGEN_TEST_CUSTOM_LINKER_FLAGS "" CACHE STRING "Additional linker flags when linking unit tests.") + set(EIGEN_TEST_CUSTOM_CXX_FLAGS "" CACHE STRING "Additional compiler flags when compiling unit tests.") + # Convert space-separated arguments into CMake lists for downstream consumption. + ei_maybe_separate_arguments(EIGEN_TEST_CUSTOM_LINKER_FLAGS NATIVE_COMMAND "${EIGEN_TEST_CUSTOM_LINKER_FLAGS}") + ei_maybe_separate_arguments(EIGEN_TEST_CUSTOM_CXX_FLAGS NATIVE_COMMAND "${EIGEN_TEST_CUSTOM_CXX_FLAGS}") + + option(EIGEN_SPLIT_LARGE_TESTS "Split large tests into smaller executables" ON) + set(EIGEN_TEST_MAX_SIZE "320" CACHE STRING "Maximal matrix/vector size, default is 320") + + # Flags for tests. + if(NOT MSVC) + # We assume that other compilers are partly compatible with GNUCC + + # clang outputs some warnings for unknown flags that are not caught by check_cxx_compiler_flag + # adding -Werror turns such warnings into errors + check_cxx_compiler_flag("-Werror" COMPILER_SUPPORT_WERROR) + if(COMPILER_SUPPORT_WERROR) + set(CMAKE_REQUIRED_FLAGS "-Werror") + endif() + ei_add_cxx_compiler_flag("-pedantic") + ei_add_cxx_compiler_flag("-Wall") + ei_add_cxx_compiler_flag("-Wextra") + # ei_add_cxx_compiler_flag("-Weverything") # clang + ei_add_cxx_compiler_flag("-Wundef") + ei_add_cxx_compiler_flag("-Wcast-align") + ei_add_cxx_compiler_flag("-Wchar-subscripts") + ei_add_cxx_compiler_flag("-Wnon-virtual-dtor") + ei_add_cxx_compiler_flag("-Wunused-local-typedefs") + ei_add_cxx_compiler_flag("-Wpointer-arith") + ei_add_cxx_compiler_flag("-Wwrite-strings") + ei_add_cxx_compiler_flag("-Wformat-security") + ei_add_cxx_compiler_flag("-Wshorten-64-to-32") + ei_add_cxx_compiler_flag("-Wlogical-op") + ei_add_cxx_compiler_flag("-Wenum-conversion") + ei_add_cxx_compiler_flag("-Wc++11-extensions") + ei_add_cxx_compiler_flag("-Wdouble-promotion") + # ei_add_cxx_compiler_flag("-Wconversion") + ei_add_cxx_compiler_flag("-Wshadow") + ei_add_cxx_compiler_flag("-Wno-psabi") + ei_add_cxx_compiler_flag("-Wno-variadic-macros") + ei_add_cxx_compiler_flag("-Wno-long-long") + ei_add_cxx_compiler_flag("-fno-common") + ei_add_cxx_compiler_flag("-fstrict-aliasing") + ei_add_cxx_compiler_flag("-wd981") # disable ICC's "operands are evaluated in unspecified order" remark + ei_add_cxx_compiler_flag("-wd2304") # disable ICC's "warning #2304: non-explicit constructor with single argument may cause implicit type conversion" produced by -Wnon-virtual-dtor + + # Clang emits warnings about unused flag. + if (NOT CMAKE_CXX_COMPILER_ID MATCHES "Clang") + ei_add_cxx_compiler_flag("-fno-check-new") + endif() + + + if(ANDROID_NDK) + ei_add_cxx_compiler_flag("-pie") + ei_add_cxx_compiler_flag("-fPIE") + endif() + + set(CMAKE_REQUIRED_FLAGS "") + + option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) + if(EIGEN_TEST_SSE2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse2") + message(STATUS "Enabling SSE2 in tests/examples") + endif() + + option(EIGEN_TEST_SSE3 "Enable/Disable SSE3 in tests/examples" OFF) + if(EIGEN_TEST_SSE3) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse3") + message(STATUS "Enabling SSE3 in tests/examples") + endif() + + option(EIGEN_TEST_SSSE3 "Enable/Disable SSSE3 in tests/examples" OFF) + if(EIGEN_TEST_SSSE3) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mssse3") + message(STATUS "Enabling SSSE3 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_1 "Enable/Disable SSE4.1 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_1) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.1") + message(STATUS "Enabling SSE4.1 in tests/examples") + endif() + + option(EIGEN_TEST_SSE4_2 "Enable/Disable SSE4.2 in tests/examples" OFF) + if(EIGEN_TEST_SSE4_2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -msse4.2") + message(STATUS "Enabling SSE4.2 in tests/examples") + endif() + + option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF) + if(EIGEN_TEST_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx") + message(STATUS "Enabling AVX in tests/examples") + endif() + + option(EIGEN_TEST_FMA "Enable/Disable FMA in tests/examples" OFF) + if(EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfma") + message(STATUS "Enabling FMA in tests/examples") + endif() + + option(EIGEN_TEST_AVX2 "Enable/Disable AVX2 in tests/examples" OFF) + if(EIGEN_TEST_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx2 -mfma") + message(STATUS "Enabling AVX2 in tests/examples") + endif() + + option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF) + if(EIGEN_TEST_AVX512) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma") + message(STATUS "Enabling AVX512 in tests/examples") + endif() + + option(EIGEN_TEST_AVX512DQ "Enable/Disable AVX512DQ in tests/examples" OFF) + if(EIGEN_TEST_AVX512DQ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512dq -mfma") + message(STATUS "Enabling AVX512DQ in tests/examples") + endif() + + option(EIGEN_TEST_AVX512FP16 "Enable/Disable AVX512-FP16 in tests/examples" OFF) + if(EIGEN_TEST_AVX512FP16) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx512f -mfma -mavx512vl -mavx512fp16") + message(STATUS "Enabling AVX512-FP16 in tests/examples") + endif() + + option(EIGEN_TEST_F16C "Enable/Disable F16C in tests/examples" OFF) + if(EIGEN_TEST_F16C) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mf16c") + message(STATUS "Enabling F16C in tests/examples") + endif() + + option(EIGEN_TEST_ALTIVEC "Enable/Disable AltiVec in tests/examples" OFF) + if(EIGEN_TEST_ALTIVEC) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maltivec -mabi=altivec") + message(STATUS "Enabling AltiVec in tests/examples") + endif() + + option(EIGEN_TEST_VSX "Enable/Disable VSX in tests/examples" OFF) + if(EIGEN_TEST_VSX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64 -mvsx") + message(STATUS "Enabling VSX in tests/examples") + endif() + + option(EIGEN_TEST_MSA "Enable/Disable MSA in tests/examples" OFF) + if(EIGEN_TEST_MSA) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mmsa") + message(STATUS "Enabling MSA in tests/examples") + endif() + + option(EIGEN_TEST_LSX "Enable/Disable LSX in tests/examples" OFF) + if(EIGEN_TEST_LSX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlsx") + message(STATUS "Enabling LSX in tests/examples") + endif() + + option(EIGEN_TEST_NEON "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON) + if(EIGEN_TEST_FMA) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon-vfpv4") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon") + endif() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfloat-abi=hard") + message(STATUS "Enabling NEON in tests/examples") + endif() + + option(EIGEN_TEST_NEON64 "Enable/Disable Neon in tests/examples" OFF) + if(EIGEN_TEST_NEON64) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + message(STATUS "Enabling NEON in tests/examples") + endif() + + option(EIGEN_TEST_Z13 "Enable/Disable S390X(zEC13) ZVECTOR in tests/examples" OFF) + if(EIGEN_TEST_Z13) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z13 -mzvector") + message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples") + endif() + + option(EIGEN_TEST_Z14 "Enable/Disable S390X(zEC14) ZVECTOR in tests/examples" OFF) + if(EIGEN_TEST_Z14) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=z14 -mzvector") + message(STATUS "Enabling S390X(zEC13) ZVECTOR in tests/examples") + endif() + + check_cxx_compiler_flag("-fopenmp" COMPILER_SUPPORT_OPENMP) + if(COMPILER_SUPPORT_OPENMP) + option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) + if(EIGEN_TEST_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp") + message(STATUS "Enabling OpenMP in tests/examples") + endif() + endif() + + else() + # C4127 - conditional expression is constant + # C4714 - marked as __forceinline not inlined (I failed to deactivate it selectively) + # We can disable this warning in the unit tests since it is clear that it occurs + # because we are oftentimes returning objects that have a destructor or may + # throw exceptions - in particular in the unit tests we are throwing extra many + # exceptions to cover indexing errors. + # C4505 - unreferenced local function has been removed (impossible to deactivate selectively) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc /wd4127 /wd4505 /wd4714") + + # replace all /Wx by /W4 + string(REGEX REPLACE "/W[0-9]" "/W4" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + + check_cxx_compiler_flag("/openmp" COMPILER_SUPPORT_OPENMP) + if(COMPILER_SUPPORT_OPENMP) + option(EIGEN_TEST_OPENMP "Enable/Disable OpenMP in tests/examples" OFF) + if(EIGEN_TEST_OPENMP) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /openmp") + message(STATUS "Enabling OpenMP in tests/examples") + endif() + endif() + + option(EIGEN_TEST_SSE2 "Enable/Disable SSE2 in tests/examples" OFF) + if(EIGEN_TEST_SSE2) + if(NOT CMAKE_CL_64) + # arch is not supported on 64 bit systems, SSE is enabled automatically. + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:SSE2") + endif() + message(STATUS "Enabling SSE2 in tests/examples") + endif() + + option(EIGEN_TEST_AVX "Enable/Disable AVX in tests/examples" OFF) + if(EIGEN_TEST_AVX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX") + message(STATUS "Enabling AVX in tests/examples") + endif() + + option(EIGEN_TEST_FMA "Enable/Disable FMA/AVX2 in tests/examples" OFF) + option(EIGEN_TEST_AVX2 "Enable/Disable FMA/AVX2 in tests/examples" OFF) + if((EIGEN_TEST_FMA AND NOT EIGEN_TEST_NEON) OR EIGEN_TEST_AVX2) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX2") + message(STATUS "Enabling FMA/AVX2 in tests/examples") + endif() + + option(EIGEN_TEST_AVX512 "Enable/Disable AVX512 in tests/examples" OFF) + option(EIGEN_TEST_AVX512DQ "Enable/Disable AVX512DQ in tests/examples" OFF) + if(EIGEN_TEST_AVX512 OR EIGEN_TEST_AVX512DQ) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /arch:AVX512") + message(STATUS "Enabling AVX512 in tests/examples") + endif() + + endif(NOT MSVC) + + option(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION "Disable explicit vectorization in tests/examples" OFF) + option(EIGEN_TEST_X87 "Force using X87 instructions. Implies no vectorization." OFF) + option(EIGEN_TEST_32BIT "Force generating 32bit code." OFF) + + if(EIGEN_TEST_X87) + set(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION ON) + if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpmath=387") + message(STATUS "Forcing use of x87 instructions in tests/examples") + else() + message(STATUS "EIGEN_TEST_X87 ignored on your compiler") + endif() + endif() + + if(EIGEN_TEST_32BIT) + if(CMAKE_COMPILER_IS_GNUCXX) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m32") + message(STATUS "Forcing generation of 32-bit code in tests/examples") + else() + message(STATUS "EIGEN_TEST_32BIT ignored on your compiler") + endif() + endif() + + if(EIGEN_TEST_NO_EXPLICIT_VECTORIZATION) + add_definitions(-DEIGEN_DONT_VECTORIZE=1) + message(STATUS "Disabling vectorization in tests/examples") + endif() + + option(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT "Disable explicit alignment (hence vectorization) in tests/examples" OFF) + if(EIGEN_TEST_NO_EXPLICIT_ALIGNMENT) + add_definitions(-DEIGEN_DONT_ALIGN=1) + message(STATUS "Disabling alignment in tests/examples") + endif() + + option(EIGEN_TEST_NO_EXCEPTIONS "Disables C++ exceptions" OFF) + if(EIGEN_TEST_NO_EXCEPTIONS) + ei_add_cxx_compiler_flag("-fno-exceptions") + message(STATUS "Disabling exceptions in tests/examples") + endif() + + set(EIGEN_CUDA_CXX_FLAGS "" CACHE STRING "Additional flags to pass to the cuda compiler.") + set(EIGEN_CUDA_COMPUTE_ARCH 30 CACHE STRING "The CUDA compute architecture(s) to target when compiling CUDA code") + + option(EIGEN_TEST_SYCL "Add Sycl support." OFF) + if(EIGEN_TEST_SYCL) + option(EIGEN_SYCL_DPCPP "Use the DPCPP Sycl implementation (DPCPP is default SYCL-Compiler)." ON) + option(EIGEN_SYCL_TRISYCL "Use the triSYCL Sycl implementation." OFF) + option(EIGEN_SYCL_ComputeCpp "Use the ComputeCPP Sycl implementation." OFF) + + # Building options + # https://developer.codeplay.com/products/computecpp/ce/2.11.0/guides/eigen-overview/options-for-building-eigen-sycl + option(EIGEN_SYCL_USE_DEFAULT_SELECTOR "Use sycl default selector to select the preferred device." OFF) + option(EIGEN_SYCL_NO_LOCAL_MEM "Build for devices without dedicated shared memory." OFF) + option(EIGEN_SYCL_LOCAL_MEM "Allow the use of local memory (enabled by default)." ON) + option(EIGEN_SYCL_LOCAL_THREAD_DIM0 "Set work group size for dimension 0." 16) + option(EIGEN_SYCL_LOCAL_THREAD_DIM1 "Set work group size for dimension 1." 16) + option(EIGEN_SYCL_ASYNC_EXECUTION "Allow asynchronous execution (enabled by default)." ON) + option(EIGEN_SYCL_DISABLE_SKINNY "Disable optimization for tall/skinny matrices." OFF) + option(EIGEN_SYCL_DISABLE_DOUBLE_BUFFER "Disable double buffer." OFF) + option(EIGEN_SYCL_DISABLE_SCALAR "Disable scalar contraction." OFF) + option(EIGEN_SYCL_DISABLE_GEMV "Disable GEMV and create a single kernel to calculate contraction instead." OFF) + + set(EIGEN_SYCL ON) + set(CMAKE_CXX_STANDARD 17) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-declarations -Wno-shorten-64-to-32 -Wno-cast-align") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-deprecated-copy-with-user-provided-copy -Wno-unused-variable") + set (CMAKE_MODULE_PATH "${CMAKE_ROOT}/Modules" "cmake/Modules/" "${CMAKE_MODULE_PATH}") + find_package(Threads REQUIRED) + if(EIGEN_SYCL_TRISYCL) + message(STATUS "Using triSYCL") + include(FindTriSYCL) + elseif(EIGEN_SYCL_ComputeCpp) + message(STATUS "Using ComputeCPP SYCL") + include(FindComputeCpp) + set(COMPUTECPP_DRIVER_DEFAULT_VALUE OFF) + if (NOT MSVC) + set(COMPUTECPP_DRIVER_DEFAULT_VALUE ON) + endif() + option(COMPUTECPP_USE_COMPILER_DRIVER + "Use ComputeCpp driver instead of a 2 steps compilation" + ${COMPUTECPP_DRIVER_DEFAULT_VALUE} + ) + else() #Default SYCL compiler is DPCPP (EIGEN_SYCL_DPCPP) + set(DPCPP_SYCL_TARGET "spir64" CACHE STRING "Default target for Intel CPU/GPU") + message(STATUS "Using DPCPP") + find_package(DPCPP) + add_definitions(-DSYCL_COMPILER_IS_DPCPP) + endif(EIGEN_SYCL_TRISYCL) + if(EIGEN_DONT_VECTORIZE_SYCL) + message(STATUS "Disabling SYCL vectorization in tests/examples") + # When disabling SYCL vectorization, also disable Eigen default vectorization + add_definitions(-DEIGEN_DONT_VECTORIZE=1) + add_definitions(-DEIGEN_DONT_VECTORIZE_SYCL=1) + endif() + endif() + + include(EigenConfigureTesting) + + if(EIGEN_LEAVE_TEST_IN_ALL_TARGET) + # CTest automatic test building relies on the "all" target. + add_subdirectory(test) + add_subdirectory(failtest) + else() + add_subdirectory(test EXCLUDE_FROM_ALL) + add_subdirectory(failtest EXCLUDE_FROM_ALL) + endif() + + ei_testing_print_summary() + + if (EIGEN_SPLIT_TESTSUITE) + ei_split_testsuite("${EIGEN_SPLIT_TESTSUITE}") + endif() +endif(EIGEN_BUILD_TESTING) + +#============================================================================== +# Other Build Configurations. +#============================================================================== +add_subdirectory(unsupported) + +if(EIGEN_BUILD_BLAS) + add_subdirectory(blas) +endif() + +if (EIGEN_BUILD_LAPACK) + add_subdirectory(lapack) +endif() + +if(EIGEN_BUILD_DOC) + add_subdirectory(doc EXCLUDE_FROM_ALL) +endif() + +# TODO: consider also replacing EIGEN_BUILD_BTL by a custom target "make btl"? +if(EIGEN_BUILD_BTL) + add_subdirectory(bench/btl EXCLUDE_FROM_ALL) +endif() + +if(NOT WIN32 AND EIGEN_BUILD_SPBENCH) + add_subdirectory(bench/spbench EXCLUDE_FROM_ALL) +endif() +#--------------------------------------------------------------------------------------# +# AOCL BENCHMARK BUILD SECTION # +#--------------------------------------------------------------------------------------# +if(EIGEN_BUILD_AOCL_BENCH) + # Allow users to override the default architecture + set(EIGEN_AOCL_BENCH_ARCH "znver5" CACHE STRING "Target architecture for AOCL benchmark") + add_executable(benchmark_aocl EXCLUDE_FROM_ALL bench/benchmark_aocl.cpp) + include(CheckCXXCompilerFlag) + check_cxx_compiler_flag("-march=${EIGEN_AOCL_BENCH_ARCH}" COMPILER_SUPPORTS_AOCL_ARCH) + if(COMPILER_SUPPORTS_AOCL_ARCH) + target_compile_options(benchmark_aocl PRIVATE -O3 -Wno-shadow -march=${EIGEN_AOCL_BENCH_ARCH}) + else() + message(WARNING "${EIGEN_AOCL_BENCH_ARCH} architecture not supported by compiler") + target_compile_options(benchmark_aocl PRIVATE -O3) + endif() + + # Add custom flags if provided + if(EIGEN_AOCL_BENCH_FLAGS) + separate_arguments(CUSTOM_FLAGS NATIVE_COMMAND "${EIGEN_AOCL_BENCH_FLAGS}") + target_compile_options(benchmark_aocl PRIVATE ${CUSTOM_FLAGS}) + # Check if OpenMP is requested in custom flags and link it + string(FIND "${EIGEN_AOCL_BENCH_FLAGS}" "-fopenmp" OPENMP_REQUESTED) + if(NOT OPENMP_REQUESTED EQUAL -1) + find_package(OpenMP) + if(OpenMP_CXX_FOUND) + target_link_libraries(benchmark_aocl OpenMP::OpenMP_CXX) + else() + # Generic fallback: let compiler handle OpenMP linking + if(MSVC) + target_compile_options(benchmark_aocl PRIVATE "/openmp") + else() + target_compile_options(benchmark_aocl PRIVATE "-fopenmp") + target_link_options(benchmark_aocl PRIVATE "-fopenmp") + endif() + message(STATUS "Using compiler OpenMP flags as fallback") + endif() + endif() + endif() + + target_include_directories(benchmark_aocl PRIVATE ${INCLUDE_INSTALL_DIR}) + if(EIGEN_AOCL_BENCH_USE_MT) + target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_MT) + else() + target_compile_definitions(benchmark_aocl PRIVATE EIGEN_USE_AOCL_ALL) + endif() + target_link_libraries(benchmark_aocl Eigen3::Eigen) + if(EIGEN_STANDARD_LIBRARIES_TO_LINK_TO) + target_link_libraries(benchmark_aocl ${EIGEN_STANDARD_LIBRARIES_TO_LINK_TO}) + endif() +endif() +#----------------------------------------------------------------------------------------# + +if (EIGEN_BUILD_DEMOS) + add_subdirectory(demos EXCLUDE_FROM_ALL) +endif() + +if (PROJECT_IS_TOP_LEVEL) + # must be after test and unsupported, for configuring buildtests.in + add_subdirectory(scripts EXCLUDE_FROM_ALL) + configure_file(scripts/cdashtesting.cmake.in cdashtesting.cmake @ONLY) +endif() + +#============================================================================== +# Summary. +#============================================================================== + +if(PROJECT_IS_TOP_LEVEL) + string(TOLOWER "${CMAKE_GENERATOR}" cmake_generator_tolower) + if(cmake_generator_tolower MATCHES "makefile") + message(STATUS "Available targets (use: make TARGET):") + else() + message(STATUS "Available targets (use: cmake --build . --target TARGET):") + endif() + message(STATUS "------------+--------------------------------------------------------------") + message(STATUS "Target | Description") + message(STATUS "------------+--------------------------------------------------------------") + message(STATUS "install | Install Eigen. Headers will be installed to:") + message(STATUS " | /") + message(STATUS " | Using the following values:") + message(STATUS " | CMAKE_INSTALL_PREFIX: ${CMAKE_INSTALL_PREFIX}") + message(STATUS " | INCLUDE_INSTALL_DIR: ${INCLUDE_INSTALL_DIR}") + message(STATUS " | Change the install location of Eigen headers using:") + message(STATUS " | cmake . -DCMAKE_INSTALL_PREFIX=yourprefix") + message(STATUS " | Or:") + message(STATUS " | cmake . -DINCLUDE_INSTALL_DIR=yourdir") + message(STATUS "uninstall | Remove files installed by the install target") + if (EIGEN_BUILD_DOC) + message(STATUS "doc | Generate the API documentation, requires Doxygen & LaTeX") + message(STATUS "install-doc | Install the API documentation") + endif() + if(EIGEN_BUILD_TESTING) + message(STATUS "check | Build and run the unit-tests. Read this page:") + message(STATUS " | http://eigen.tuxfamily.org/index.php?title=Tests") + endif() + if (EIGEN_BUILD_BLAS) + message(STATUS "blas | Build BLAS library (not the same thing as Eigen)") + endif() + if (EIGEN_BUILD_LAPACK) + message(STATUS "lapack | Build LAPACK subset library (not the same thing as Eigen)") + endif() + if(EIGEN_BUILD_AOCL_BENCH) + message(STATUS "benchmark_aocl | Build AOCL benchmark executable") + endif() + message(STATUS "------------+--------------------------------------------------------------") + message(STATUS "") +endif() + +message(STATUS "") +message(STATUS "Configured Eigen ${EIGEN_VERSION_STRING}") +message(STATUS "") + diff --git a/o-voxel/third_party/eigen/COPYING.APACHE b/o-voxel/third_party/eigen/COPYING.APACHE new file mode 100644 index 0000000000000000000000000000000000000000..4fba9c84e41741f029491d5aa256a79ec2569c86 --- /dev/null +++ b/o-voxel/third_party/eigen/COPYING.APACHE @@ -0,0 +1,203 @@ +/* + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +*/ \ No newline at end of file diff --git a/o-voxel/third_party/eigen/COPYING.BSD b/o-voxel/third_party/eigen/COPYING.BSD new file mode 100644 index 0000000000000000000000000000000000000000..5ca3c22fa0eabf60a35bc277a0bb971558c84983 --- /dev/null +++ b/o-voxel/third_party/eigen/COPYING.BSD @@ -0,0 +1,26 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ diff --git a/o-voxel/third_party/eigen/COPYING.MINPACK b/o-voxel/third_party/eigen/COPYING.MINPACK new file mode 100644 index 0000000000000000000000000000000000000000..4ab5916f7061b11866c4e228d6e151b5fdd76803 --- /dev/null +++ b/o-voxel/third_party/eigen/COPYING.MINPACK @@ -0,0 +1,51 @@ +Minpack Copyright Notice (1999) University of Chicago. All rights reserved + +Redistribution and use in source and binary forms, with or +without modification, are permitted provided that the +following conditions are met: + +1. Redistributions of source code must retain the above +copyright notice, this list of conditions and the following +disclaimer. + +2. Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following +disclaimer in the documentation and/or other materials +provided with the distribution. + +3. The end-user documentation included with the +redistribution, if any, must include the following +acknowledgment: + + "This product includes software developed by the + University of Chicago, as Operator of Argonne National + Laboratory. + +Alternately, this acknowledgment may appear in the software +itself, if and wherever such third-party acknowledgments +normally appear. + +4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS" +WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE +UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND +THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE +OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY +OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR +USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF +THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4) +DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION +UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL +BE CORRECTED. + +5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT +HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF +ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT, +INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF +ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF +PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER +SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT +(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE, +EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE +POSSIBILITY OF SUCH LOSS OR DAMAGES. diff --git a/o-voxel/third_party/eigen/COPYING.MPL2 b/o-voxel/third_party/eigen/COPYING.MPL2 new file mode 100644 index 0000000000000000000000000000000000000000..43bbdb70921c6189ab2898505639490469f1373d --- /dev/null +++ b/o-voxel/third_party/eigen/COPYING.MPL2 @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at https://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/o-voxel/third_party/eigen/COPYING.README b/o-voxel/third_party/eigen/COPYING.README new file mode 100644 index 0000000000000000000000000000000000000000..33efa29c9716a94f54dc7aad375bc8d64efe164b --- /dev/null +++ b/o-voxel/third_party/eigen/COPYING.README @@ -0,0 +1,6 @@ +Eigen is primarily MPL2 licensed. See COPYING.MPL2 and these links: + http://www.mozilla.org/MPL/2.0/ + http://www.mozilla.org/MPL/2.0/FAQ.html + +Some files contain third-party code under BSD or other MPL2-compatible licenses, +whence the other COPYING.* files here. \ No newline at end of file diff --git a/o-voxel/third_party/eigen/CTestConfig.cmake b/o-voxel/third_party/eigen/CTestConfig.cmake new file mode 100644 index 0000000000000000000000000000000000000000..f5081a9dac54296af41998b088acb05b2df0ed75 --- /dev/null +++ b/o-voxel/third_party/eigen/CTestConfig.cmake @@ -0,0 +1,17 @@ +## This file should be placed in the root directory of your project. +## Then modify the CMakeLists.txt file in the root directory of your +## project to incorporate the testing dashboard. +## # The following are required to uses Dart and the Cdash dashboard +## enable_testing() +## include(CTest) +set(CTEST_PROJECT_NAME "Eigen") +set(CTEST_NIGHTLY_START_TIME "00:00:00 UTC") + +set(CTEST_DROP_METHOD "http") +set(CTEST_DROP_SITE "my.cdash.org") +set(CTEST_DROP_LOCATION "/submit.php?project=Eigen") +set(CTEST_DROP_SITE_CDASH TRUE) +#set(CTEST_PROJECT_SUBPROJECTS +#Official +#Unsupported +#) diff --git a/o-voxel/third_party/eigen/CTestCustom.cmake.in b/o-voxel/third_party/eigen/CTestCustom.cmake.in new file mode 100644 index 0000000000000000000000000000000000000000..cfe4817a78c60baf21f30b410c797a1ff5e1e94d --- /dev/null +++ b/o-voxel/third_party/eigen/CTestCustom.cmake.in @@ -0,0 +1,4 @@ + +set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_WARNINGS "2000") +set(CTEST_CUSTOM_MAXIMUM_NUMBER_OF_ERRORS "2000") +list(APPEND CTEST_CUSTOM_ERROR_EXCEPTION @EIGEN_CTEST_ERROR_EXCEPTION@) diff --git a/o-voxel/third_party/eigen/Eigen/AccelerateSupport b/o-voxel/third_party/eigen/Eigen/AccelerateSupport new file mode 100644 index 0000000000000000000000000000000000000000..b1ed4ea7751f776466fe66cd26e87c1eff23289d --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/AccelerateSupport @@ -0,0 +1,52 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ACCELERATESUPPORT_MODULE_H +#define EIGEN_ACCELERATESUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \ingroup Support_modules + * \defgroup AccelerateSupport_Module AccelerateSupport module + * + * This module provides an interface to the Apple Accelerate library. + * It provides the seven following main factorization classes: + * - class AccelerateLLT: a Cholesky (LL^T) factorization. + * - class AccelerateLDLT: the default LDL^T factorization. + * - class AccelerateLDLTUnpivoted: a Cholesky-like LDL^T factorization with only 1x1 pivots and no pivoting + * - class AccelerateLDLTSBK: an LDL^T factorization with Supernode Bunch-Kaufman and static pivoting + * - class AccelerateLDLTTPP: an LDL^T factorization with full threshold partial pivoting + * - class AccelerateQR: a QR factorization + * - class AccelerateCholeskyAtA: a QR factorization without storing Q (equivalent to A^TA = R^T R) + * + * \code + * #include + * \endcode + * + * In order to use this module, the Accelerate headers must be accessible from + * the include paths, and your binary must be linked to the Accelerate framework. + * The Accelerate library is only available on Apple hardware. + * + * Note that many of the algorithms can be influenced by the UpLo template + * argument. All matrices are assumed to be symmetric. For example, the following + * creates an LDLT factorization where your matrix is symmetric (implicit) and + * uses the lower triangle: + * + * \code + * AccelerateLDLT, Lower> ldlt; + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/AccelerateSupport/AccelerateSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ACCELERATESUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Cholesky b/o-voxel/third_party/eigen/Eigen/Cholesky new file mode 100644 index 0000000000000000000000000000000000000000..d04981d82f08bb0962fa55cfe17fb43924b3ccc6 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Cholesky @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLESKY_MODULE_H +#define EIGEN_CHOLESKY_MODULE_H + +#include "Core" +#include "Jacobi" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Cholesky_Module Cholesky module + * + * + * + * This module provides two variants of the Cholesky decomposition for selfadjoint (hermitian) matrices. + * Those decompositions are also accessible via the following methods: + * - MatrixBase::llt() + * - MatrixBase::ldlt() + * - SelfAdjointView::llt() + * - SelfAdjointView::ldlt() + * + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/Cholesky/LLT.h" +#include "src/Cholesky/LDLT.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke_helpers.h" +#include "src/Cholesky/LLT_LAPACKE.h" +#endif +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLESKY_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/CholmodSupport b/o-voxel/third_party/eigen/Eigen/CholmodSupport new file mode 100644 index 0000000000000000000000000000000000000000..8ea3564427bb0ba74ccd8f193d29b85dc10492d5 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/CholmodSupport @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H +#define EIGEN_CHOLMODSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include + +/** \ingroup Support_modules + * \defgroup CholmodSupport_Module CholmodSupport module + * + * This module provides an interface to the Cholmod library which is part of the suitesparse package. It provides the two following main factorization classes: + * - class CholmodSupernodalLLT: a supernodal LLT Cholesky factorization. + * - class CholmodDecomposition: a general L(D)LT Cholesky factorization with automatic or explicit runtime selection of + * the underlying factorization method (supernodal or simplicial). + * + * For the sake of completeness, this module also propose the two following classes: + * - class CholmodSimplicialLLT + * - class CholmodSimplicialLDLT + * Note that these classes does not bring any particular advantage compared to the built-in + * SimplicialLLT and SimplicialLDLT factorization classes. + * + * \code + * #include + * \endcode + * + * In order to use this module, the cholmod headers must be accessible from the include paths, and your binary must be + * linked to the cholmod library and its dependencies. The dependencies depend on how cholmod has been compiled. For a + * cmake based project, you can use our FindCholmod.cmake module to help you in this task. + * + */ + +// IWYU pragma: begin_exports +#include "src/CholmodSupport/CholmodSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CHOLMODSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Core b/o-voxel/third_party/eigen/Eigen/Core new file mode 100644 index 0000000000000000000000000000000000000000..8f2d5ea94a802544231f7004dd469f1284933e31 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Core @@ -0,0 +1,477 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2011 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CORE_MODULE_H +#define EIGEN_CORE_MODULE_H + +// Eigen version information. +#include "Version" + +// first thing Eigen does: stop the compiler from reporting useless warnings. +#include "src/Core/util/DisableStupidWarnings.h" + +// then include this file where all our macros are defined. It's really important to do it first because +// it's where we do all the compiler/OS/arch detections and define most defaults. +#include "src/Core/util/Macros.h" + +// This detects SSE/AVX/NEON/etc. and configure alignment settings +#include "src/Core/util/ConfigureVectorization.h" + +// We need cuda_runtime.h/hip_runtime.h to ensure that +// the EIGEN_USING_STD macro works properly on the device side +#if defined(EIGEN_CUDACC) +#include +#elif defined(EIGEN_HIPCC) +#include +#endif + +#ifdef EIGEN_EXCEPTIONS +#include +#endif + +// Disable the ipa-cp-clone optimization flag with MinGW 6.x or older (enabled by default with -O3) +// See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details. +#if EIGEN_COMP_MINGW && EIGEN_GNUC_STRICT_LESS_THAN(6, 0, 0) +#pragma GCC optimize("-fno-ipa-cp-clone") +#endif + +// Prevent ICC from specializing std::complex operators that silently fail +// on device. This allows us to use our own device-compatible specializations +// instead. +#if EIGEN_COMP_ICC && defined(EIGEN_GPU_COMPILE_PHASE) && !defined(_OVERRIDE_COMPLEX_SPECIALIZATION_) +#define _OVERRIDE_COMPLEX_SPECIALIZATION_ 1 +#endif +#include + +// this include file manages BLAS and MKL related macros +// and inclusion of their respective header files +#include "src/Core/util/MKL_support.h" +#include "src/Core/util/AOCL_Support.h" // ↠ADD THIS + + +#if defined(EIGEN_HAS_CUDA_FP16) || defined(EIGEN_HAS_HIP_FP16) +#define EIGEN_HAS_GPU_FP16 +#endif + +#if defined(EIGEN_HAS_CUDA_BF16) || defined(EIGEN_HAS_HIP_BF16) +#define EIGEN_HAS_GPU_BF16 +#endif + +#if (defined _OPENMP) && (!defined EIGEN_DONT_PARALLELIZE) +#define EIGEN_HAS_OPENMP +#endif + +#ifdef EIGEN_HAS_OPENMP +#include +#include +#endif + +// MSVC for windows mobile does not have the errno.h file +#if !(EIGEN_COMP_MSVC && EIGEN_OS_WINCE) && !EIGEN_COMP_ARM +#define EIGEN_HAS_ERRNO +#endif + +#ifdef EIGEN_HAS_ERRNO +#include +#endif +#include +#include +#include +#include +#ifndef EIGEN_NO_IO +#include +#include +#endif +#include +#include +#include +#include // for CHAR_BIT +// for min/max: +#include + +#include +#include +#include + +// for std::is_nothrow_move_assignable +#include + +// for std::this_thread::yield(). +#if !defined(EIGEN_USE_BLAS) && (defined(EIGEN_HAS_OPENMP) || defined(EIGEN_GEMM_THREADPOOL)) +#include +#endif + +// for __cpp_lib feature test macros +#if defined(__has_include) && __has_include() +#include +#endif + +// for std::bit_cast() +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +#include +#endif + +// for outputting debug info +#ifdef EIGEN_DEBUG_ASSIGN +#include +#endif + +// required for __cpuid, needs to be included after cmath +// also required for _BitScanReverse on Windows on ARM +#if EIGEN_COMP_MSVC && (EIGEN_ARCH_i386_OR_x86_64 || EIGEN_ARCH_ARM64) && !EIGEN_OS_WINCE +#include +#endif + +#if defined(EIGEN_USE_SYCL) +#undef min +#undef max +#undef isnan +#undef isinf +#undef isfinite +#include +#include +#include +#include +#ifndef EIGEN_SYCL_LOCAL_THREAD_DIM0 +#define EIGEN_SYCL_LOCAL_THREAD_DIM0 16 +#endif +#ifndef EIGEN_SYCL_LOCAL_THREAD_DIM1 +#define EIGEN_SYCL_LOCAL_THREAD_DIM1 16 +#endif +#endif + +#if defined EIGEN2_SUPPORT_STAGE40_FULL_EIGEN3_STRICTNESS || defined EIGEN2_SUPPORT_STAGE30_FULL_EIGEN3_API || \ + defined EIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined EIGEN2_SUPPORT_STAGE10_FULL_EIGEN2_API || \ + defined EIGEN2_SUPPORT +// This will generate an error message: +#error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information +#endif + +namespace Eigen { + +// we use size_t frequently and we'll never remember to prepend it with std:: every time just to +// ensure QNX/QCC support +using std::size_t; +// gcc 4.6.0 wants std:: for ptrdiff_t +using std::ptrdiff_t; + +} // namespace Eigen + +/** \defgroup Core_Module Core module + * This is the main module of Eigen providing dense matrix and vector support + * (both fixed and dynamic size) with all the features corresponding to a BLAS library + * and much more... + * + * \code + * #include + * \endcode + */ + +#ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else +#include "src/misc/lapacke.h" +#endif +#endif + +// IWYU pragma: begin_exports +#include "src/Core/util/Constants.h" +#include "src/Core/util/Meta.h" +#include "src/Core/util/Assert.h" +#include "src/Core/util/ForwardDeclarations.h" +#include "src/Core/util/StaticAssert.h" +#include "src/Core/util/XprHelper.h" +#include "src/Core/util/Memory.h" +#include "src/Core/util/IntegralConstant.h" +#include "src/Core/util/Serializer.h" +#include "src/Core/util/SymbolicIndex.h" +#include "src/Core/util/EmulateArray.h" +#include "src/Core/util/MoreMeta.h" + +#include "src/Core/NumTraits.h" +#include "src/Core/MathFunctions.h" +#include "src/Core/RandomImpl.h" +#include "src/Core/GenericPacketMath.h" +#include "src/Core/MathFunctionsImpl.h" +#include "src/Core/arch/Default/ConjHelper.h" +// Generic half float support +#include "src/Core/arch/Default/Half.h" +#include "src/Core/arch/Default/BFloat16.h" +#include "src/Core/arch/Default/GenericPacketMathFunctionsFwd.h" + +#if defined(EIGEN_VECTORIZE_GENERIC) && !defined(EIGEN_DONT_VECTORIZE) +#include "src/Core/arch/clang/PacketMath.h" +#include "src/Core/arch/clang/TypeCasting.h" +#include "src/Core/arch/clang/Complex.h" +#include "src/Core/arch/clang/Reductions.h" +#include "src/Core/arch/clang/MathFunctions.h" +#else +#if defined EIGEN_VECTORIZE_AVX512 +#include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" +#include "src/Core/arch/AVX/PacketMath.h" +#include "src/Core/arch/AVX/Reductions.h" +#include "src/Core/arch/AVX512/PacketMath.h" +#include "src/Core/arch/AVX512/Reductions.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 +#include "src/Core/arch/AVX512/PacketMathFP16.h" +#endif +#include "src/Core/arch/SSE/TypeCasting.h" +#include "src/Core/arch/AVX/TypeCasting.h" +#include "src/Core/arch/AVX512/TypeCasting.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 +#include "src/Core/arch/AVX512/TypeCastingFP16.h" +#endif +#include "src/Core/arch/SSE/Complex.h" +#include "src/Core/arch/AVX/Complex.h" +#include "src/Core/arch/AVX512/Complex.h" +#include "src/Core/arch/SSE/MathFunctions.h" +#include "src/Core/arch/AVX/MathFunctions.h" +#include "src/Core/arch/AVX512/MathFunctions.h" +#if defined EIGEN_VECTORIZE_AVX512FP16 +#include "src/Core/arch/AVX512/MathFunctionsFP16.h" +#endif +#include "src/Core/arch/AVX512/TrsmKernel.h" +#elif defined EIGEN_VECTORIZE_AVX +// Use AVX for floats and doubles, SSE for integers +#include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" +#include "src/Core/arch/SSE/TypeCasting.h" +#include "src/Core/arch/SSE/Complex.h" +#include "src/Core/arch/AVX/PacketMath.h" +#include "src/Core/arch/AVX/Reductions.h" +#include "src/Core/arch/AVX/TypeCasting.h" +#include "src/Core/arch/AVX/Complex.h" +#include "src/Core/arch/SSE/MathFunctions.h" +#include "src/Core/arch/AVX/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_SSE +#include "src/Core/arch/SSE/PacketMath.h" +#include "src/Core/arch/SSE/Reductions.h" +#include "src/Core/arch/SSE/TypeCasting.h" +#include "src/Core/arch/SSE/MathFunctions.h" +#include "src/Core/arch/SSE/Complex.h" +#endif + +#if defined(EIGEN_VECTORIZE_ALTIVEC) || defined(EIGEN_VECTORIZE_VSX) +#include "src/Core/arch/AltiVec/PacketMath.h" +#include "src/Core/arch/AltiVec/TypeCasting.h" +#include "src/Core/arch/AltiVec/MathFunctions.h" +#include "src/Core/arch/AltiVec/Complex.h" +#elif defined EIGEN_VECTORIZE_NEON +#include "src/Core/arch/NEON/PacketMath.h" +#include "src/Core/arch/NEON/TypeCasting.h" +#include "src/Core/arch/NEON/MathFunctions.h" +#include "src/Core/arch/NEON/Complex.h" +#elif defined EIGEN_VECTORIZE_LSX +#include "src/Core/arch/LSX/PacketMath.h" +#include "src/Core/arch/LSX/TypeCasting.h" +#include "src/Core/arch/LSX/MathFunctions.h" +#include "src/Core/arch/LSX/Complex.h" +#elif defined EIGEN_VECTORIZE_SVE +#include "src/Core/arch/SVE/PacketMath.h" +#include "src/Core/arch/SVE/TypeCasting.h" +#include "src/Core/arch/SVE/MathFunctions.h" +#elif defined EIGEN_VECTORIZE_RVV10 +#include "src/Core/arch/RVV10/PacketMath.h" +#include "src/Core/arch/RVV10/PacketMath4.h" +#include "src/Core/arch/RVV10/PacketMath2.h" +#include "src/Core/arch/RVV10/TypeCasting.h" +#include "src/Core/arch/RVV10/MathFunctions.h" +#if defined EIGEN_VECTORIZE_RVV10FP16 +#include "src/Core/arch/RVV10/PacketMathFP16.h" +#endif +#elif defined EIGEN_VECTORIZE_ZVECTOR +#include "src/Core/arch/ZVector/PacketMath.h" +#include "src/Core/arch/ZVector/MathFunctions.h" +#include "src/Core/arch/ZVector/Complex.h" +#elif defined EIGEN_VECTORIZE_MSA +#include "src/Core/arch/MSA/PacketMath.h" +#include "src/Core/arch/MSA/MathFunctions.h" +#include "src/Core/arch/MSA/Complex.h" +#elif defined EIGEN_VECTORIZE_HVX +#include "src/Core/arch/HVX/PacketMath.h" +#endif + +#if defined EIGEN_VECTORIZE_GPU +#include "src/Core/arch/GPU/PacketMath.h" +#include "src/Core/arch/GPU/MathFunctions.h" +#include "src/Core/arch/GPU/TypeCasting.h" +#endif + +#if defined(EIGEN_USE_SYCL) +#include "src/Core/arch/SYCL/InteropHeaders.h" +#if !defined(EIGEN_DONT_VECTORIZE_SYCL) +#include "src/Core/arch/SYCL/PacketMath.h" +#include "src/Core/arch/SYCL/MathFunctions.h" +#include "src/Core/arch/SYCL/TypeCasting.h" +#endif +#endif + +#endif // #ifndef EIGEN_VECTORIZE_GENERIC + +#include "src/Core/arch/Default/Settings.h" +// This file provides generic implementations valid for scalar as well +#include "src/Core/arch/Default/GenericPacketMathFunctions.h" + +#include "src/Core/functors/TernaryFunctors.h" +#include "src/Core/functors/BinaryFunctors.h" +#include "src/Core/functors/UnaryFunctors.h" +#include "src/Core/functors/NullaryFunctors.h" +#include "src/Core/functors/StlFunctors.h" +#include "src/Core/functors/AssignmentFunctors.h" + +// Specialized functors for GPU. +#ifdef EIGEN_GPUCC +#include "src/Core/arch/GPU/Complex.h" +#endif + +// Specializations of vectorized activation functions for NEON. +#ifdef EIGEN_VECTORIZE_NEON +#include "src/Core/arch/NEON/UnaryFunctors.h" +#endif + +#include "src/Core/util/IndexedViewHelper.h" +#include "src/Core/util/ReshapedHelper.h" +#include "src/Core/ArithmeticSequence.h" +#ifndef EIGEN_NO_IO +#include "src/Core/IO.h" +#endif +#include "src/Core/DenseCoeffsBase.h" +#include "src/Core/DenseBase.h" +#include "src/Core/MatrixBase.h" +#include "src/Core/EigenBase.h" + +#include "src/Core/Product.h" +#include "src/Core/CoreEvaluators.h" +#include "src/Core/AssignEvaluator.h" +#include "src/Core/RealView.h" +#include "src/Core/Assign.h" + +#include "src/Core/ArrayBase.h" +#include "src/Core/util/BlasUtil.h" +#include "src/Core/DenseStorage.h" +#include "src/Core/NestByValue.h" + +// #include "src/Core/ForceAlignedAccess.h" + +#include "src/Core/ReturnByValue.h" +#include "src/Core/NoAlias.h" +#include "src/Core/PlainObjectBase.h" +#include "src/Core/Matrix.h" +#include "src/Core/Array.h" +#include "src/Core/Fill.h" +#include "src/Core/CwiseTernaryOp.h" +#include "src/Core/CwiseBinaryOp.h" +#include "src/Core/CwiseUnaryOp.h" +#include "src/Core/CwiseNullaryOp.h" +#include "src/Core/CwiseUnaryView.h" +#include "src/Core/SelfCwiseBinaryOp.h" +#include "src/Core/InnerProduct.h" +#include "src/Core/Dot.h" +#include "src/Core/StableNorm.h" +#include "src/Core/Stride.h" +#include "src/Core/MapBase.h" +#include "src/Core/Map.h" +#include "src/Core/Ref.h" +#include "src/Core/Block.h" +#include "src/Core/VectorBlock.h" +#include "src/Core/IndexedView.h" +#include "src/Core/Reshaped.h" +#include "src/Core/Transpose.h" +#include "src/Core/DiagonalMatrix.h" +#include "src/Core/Diagonal.h" +#include "src/Core/DiagonalProduct.h" +#include "src/Core/SkewSymmetricMatrix3.h" +#include "src/Core/Redux.h" +#include "src/Core/Visitor.h" +#include "src/Core/FindCoeff.h" +#include "src/Core/Fuzzy.h" +#include "src/Core/Swap.h" +#include "src/Core/CommaInitializer.h" +#include "src/Core/GeneralProduct.h" +#include "src/Core/Solve.h" +#include "src/Core/Inverse.h" +#include "src/Core/SolverBase.h" +#include "src/Core/PermutationMatrix.h" +#include "src/Core/Transpositions.h" +#include "src/Core/TriangularMatrix.h" +#include "src/Core/SelfAdjointView.h" +#include "src/Core/products/GeneralBlockPanelKernel.h" +#include "src/Core/DeviceWrapper.h" +#ifdef EIGEN_GEMM_THREADPOOL +#include "ThreadPool" +#endif +#include "src/Core/products/Parallelizer.h" +#include "src/Core/ProductEvaluators.h" +#include "src/Core/products/GeneralMatrixVector.h" +#include "src/Core/products/GeneralMatrixMatrix.h" +#include "src/Core/SolveTriangular.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular.h" +#include "src/Core/products/SelfadjointMatrixVector.h" +#include "src/Core/products/SelfadjointMatrixMatrix.h" +#include "src/Core/products/SelfadjointProduct.h" +#include "src/Core/products/SelfadjointRank2Update.h" +#include "src/Core/products/TriangularMatrixVector.h" +#include "src/Core/products/TriangularMatrixMatrix.h" +#include "src/Core/products/TriangularSolverMatrix.h" +#include "src/Core/products/TriangularSolverVector.h" +#include "src/Core/BandMatrix.h" +#include "src/Core/CoreIterators.h" +#include "src/Core/ConditionEstimator.h" + +#if !defined(EIGEN_VECTORIZE_GENERIC) +#if defined(EIGEN_VECTORIZE_VSX) +#include "src/Core/arch/AltiVec/MatrixProduct.h" +#elif defined EIGEN_VECTORIZE_NEON +#include "src/Core/arch/NEON/GeneralBlockPanelKernel.h" +#elif defined EIGEN_VECTORIZE_LSX +#include "src/Core/arch/LSX/GeneralBlockPanelKernel.h" +#endif + +#if defined(EIGEN_VECTORIZE_AVX512) +#include "src/Core/arch/AVX512/GemmKernel.h" +#endif +#endif + +#include "src/Core/Select.h" +#include "src/Core/VectorwiseOp.h" +#include "src/Core/PartialReduxEvaluator.h" +#include "src/Core/Random.h" +#include "src/Core/Replicate.h" +#include "src/Core/Reverse.h" +#include "src/Core/ArrayWrapper.h" +#include "src/Core/StlIterators.h" + +#ifdef EIGEN_USE_BLAS +#include "src/Core/products/GeneralMatrixMatrix_BLAS.h" +#include "src/Core/products/GeneralMatrixVector_BLAS.h" +#include "src/Core/products/GeneralMatrixMatrixTriangular_BLAS.h" +#include "src/Core/products/SelfadjointMatrixMatrix_BLAS.h" +#include "src/Core/products/SelfadjointMatrixVector_BLAS.h" +#include "src/Core/products/TriangularMatrixMatrix_BLAS.h" +#include "src/Core/products/TriangularMatrixVector_BLAS.h" +#include "src/Core/products/TriangularSolverMatrix_BLAS.h" +#endif // EIGEN_USE_BLAS + +#ifdef EIGEN_USE_MKL_VML +#include "src/Core/Assign_MKL.h" +#endif + +#ifdef EIGEN_USE_AOCL_VML +#include "src/Core/Assign_AOCL.h" +#endif + +#include "src/Core/GlobalFunctions.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CORE_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Dense b/o-voxel/third_party/eigen/Eigen/Dense new file mode 100644 index 0000000000000000000000000000000000000000..daf70967f393df441a9ed3bcf16994348c107c02 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Dense @@ -0,0 +1,7 @@ +#include "Core" +#include "LU" +#include "Cholesky" +#include "QR" +#include "SVD" +#include "Geometry" +#include "Eigenvalues" diff --git a/o-voxel/third_party/eigen/Eigen/Eigen b/o-voxel/third_party/eigen/Eigen/Eigen new file mode 100644 index 0000000000000000000000000000000000000000..4de3281bb4de2a90526d3adfdab80f651c2231b8 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Eigen @@ -0,0 +1,2 @@ +#include "Dense" +#include "Sparse" diff --git a/o-voxel/third_party/eigen/Eigen/Eigenvalues b/o-voxel/third_party/eigen/Eigen/Eigenvalues new file mode 100644 index 0000000000000000000000000000000000000000..4bb36b29a7b7459dfac05a391ff6b4eb4e57d3e9 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Eigenvalues @@ -0,0 +1,63 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENVALUES_MODULE_H +#define EIGEN_EIGENVALUES_MODULE_H + +#include "Core" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" +#include "LU" +#include "Geometry" +#include "Sparse" // Needed by ComplexQZ. + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Eigenvalues_Module Eigenvalues module + * + * + * + * This module mainly provides various eigenvalue solvers. + * This module also provides some MatrixBase methods, including: + * - MatrixBase::eigenvalues(), + * - MatrixBase::operatorNorm() + * + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/Eigenvalues/Tridiagonalization.h" +#include "src/Eigenvalues/RealSchur.h" +#include "src/Eigenvalues/EigenSolver.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver.h" +#include "src/Eigenvalues/GeneralizedSelfAdjointEigenSolver.h" +#include "src/Eigenvalues/HessenbergDecomposition.h" +#include "src/Eigenvalues/ComplexSchur.h" +#include "src/Eigenvalues/ComplexEigenSolver.h" +#include "src/Eigenvalues/RealQZ.h" +#include "src/Eigenvalues/ComplexQZ.h" +#include "src/Eigenvalues/GeneralizedEigenSolver.h" +#include "src/Eigenvalues/MatrixBaseEigenvalues.h" +#ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else +#include "src/misc/lapacke.h" +#endif +#include "src/Eigenvalues/RealSchur_LAPACKE.h" +#include "src/Eigenvalues/ComplexSchur_LAPACKE.h" +#include "src/Eigenvalues/SelfAdjointEigenSolver_LAPACKE.h" +#endif +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_EIGENVALUES_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Geometry b/o-voxel/third_party/eigen/Eigen/Geometry new file mode 100644 index 0000000000000000000000000000000000000000..a57d254568a8a50e583b714fb82159403a1582dc --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Geometry @@ -0,0 +1,62 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GEOMETRY_MODULE_H +#define EIGEN_GEOMETRY_MODULE_H + +#include "Core" + +#include "SVD" +#include "LU" +#include + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Geometry_Module Geometry module + * + * This module provides support for: + * - fixed-size homogeneous transformations + * - translation, scaling, 2D and 3D rotations + * - \link Quaternion quaternions \endlink + * - cross products (\ref MatrixBase::cross(), \ref MatrixBase::cross3()) + * - orthogonal vector generation (MatrixBase::unitOrthogonal) + * - some linear components: \link ParametrizedLine parametrized-lines \endlink and \link Hyperplane hyperplanes \endlink + * - \link AlignedBox axis aligned bounding boxes \endlink + * - \link umeyama() least-square transformation fitting \endlink + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/Geometry/OrthoMethods.h" +#include "src/Geometry/EulerAngles.h" +#include "src/Geometry/Homogeneous.h" +#include "src/Geometry/RotationBase.h" +#include "src/Geometry/Rotation2D.h" +#include "src/Geometry/Quaternion.h" +#include "src/Geometry/AngleAxis.h" +#include "src/Geometry/Transform.h" +#include "src/Geometry/Translation.h" +#include "src/Geometry/Scaling.h" +#include "src/Geometry/Hyperplane.h" +#include "src/Geometry/ParametrizedLine.h" +#include "src/Geometry/AlignedBox.h" +#include "src/Geometry/Umeyama.h" + +#ifndef EIGEN_VECTORIZE_GENERIC +// TODO(rmlarsen): Make these work with generic vectorization if possible. +// Use the SSE optimized version whenever possible. +#if (defined EIGEN_VECTORIZE_SSE) || (defined EIGEN_VECTORIZE_NEON) +#include "src/Geometry/arch/Geometry_SIMD.h" +#endif +#endif +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_GEOMETRY_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Householder b/o-voxel/third_party/eigen/Eigen/Householder new file mode 100644 index 0000000000000000000000000000000000000000..54bcfd578fbb5ffc503137d18cfd59804b13a23c --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Householder @@ -0,0 +1,31 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_HOUSEHOLDER_MODULE_H +#define EIGEN_HOUSEHOLDER_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Householder_Module Householder module + * This module provides Householder transformations. + * + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/Householder/Householder.h" +#include "src/Householder/HouseholderSequence.h" +#include "src/Householder/BlockHouseholder.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_HOUSEHOLDER_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/IterativeLinearSolvers b/o-voxel/third_party/eigen/Eigen/IterativeLinearSolvers new file mode 100644 index 0000000000000000000000000000000000000000..ed16047f8ad967089fd906688a14b8fc8fd389d7 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/IterativeLinearSolvers @@ -0,0 +1,52 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ITERATIVELINEARSOLVERS_MODULE_H +#define EIGEN_ITERATIVELINEARSOLVERS_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup IterativeLinearSolvers_Module IterativeLinearSolvers module + * + * This module currently provides iterative methods to solve problems of the form \c A \c x = \c b, where \c A is a + squared matrix, usually very large and sparse. + * Those solvers are accessible via the following classes: + * - ConjugateGradient for selfadjoint (hermitian) matrices, + * - LeastSquaresConjugateGradient for rectangular least-square problems, + * - BiCGSTAB for general square matrices. + * + * These iterative solvers are associated with some preconditioners: + * - IdentityPreconditioner - not really useful + * - DiagonalPreconditioner - also called Jacobi preconditioner, work very well on diagonal dominant matrices. + * - IncompleteLUT - incomplete LU factorization with dual thresholding + * + * Such problems can also be solved using the direct sparse decomposition modules: SparseCholesky, CholmodSupport, + UmfPackSupport, SuperLUSupport, AccelerateSupport. + * + \code + #include + \endcode + */ + +// IWYU pragma: begin_exports +#include "src/IterativeLinearSolvers/SolveWithGuess.h" +#include "src/IterativeLinearSolvers/IterativeSolverBase.h" +#include "src/IterativeLinearSolvers/BasicPreconditioners.h" +#include "src/IterativeLinearSolvers/ConjugateGradient.h" +#include "src/IterativeLinearSolvers/LeastSquareConjugateGradient.h" +#include "src/IterativeLinearSolvers/BiCGSTAB.h" +#include "src/IterativeLinearSolvers/IncompleteLUT.h" +#include "src/IterativeLinearSolvers/IncompleteCholesky.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ITERATIVELINEARSOLVERS_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Jacobi b/o-voxel/third_party/eigen/Eigen/Jacobi new file mode 100644 index 0000000000000000000000000000000000000000..47b65a508812b2ba5cb56ce1937f7ec21e5d425e --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Jacobi @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_JACOBI_MODULE_H +#define EIGEN_JACOBI_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup Jacobi_Module Jacobi module + * This module provides Jacobi and Givens rotations. + * + * \code + * #include + * \endcode + * + * In addition to listed classes, it defines the two following MatrixBase methods to apply a Jacobi or Givens rotation: + * - MatrixBase::applyOnTheLeft() + * - MatrixBase::applyOnTheRight(). + */ + +// IWYU pragma: begin_exports +#include "src/Jacobi/Jacobi.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_JACOBI_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/KLUSupport b/o-voxel/third_party/eigen/Eigen/KLUSupport new file mode 100644 index 0000000000000000000000000000000000000000..a72907e5cef2a6e65cb50dc37dcc232e3b07dc9f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/KLUSupport @@ -0,0 +1,43 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_KLUSUPPORT_MODULE_H +#define EIGEN_KLUSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +#include +} + +/** \ingroup Support_modules + * \defgroup KLUSupport_Module KLUSupport module + * + * This module provides an interface to the KLU library which is part of the suitesparse package. It provides the following factorization class: + * - class KLU: a sparse LU factorization, well-suited for circuit simulation. + * + * \code + * #include + * \endcode + * + * In order to use this module, the klu and btf headers must be accessible from the include paths, and your binary must + * be linked to the klu library and its dependencies. The dependencies depend on how umfpack has been compiled. For a + * cmake based project, you can use our FindKLU.cmake module to help you in this task. + * + */ + +// IWYU pragma: begin_exports +#include "src/KLUSupport/KLUSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_KLUSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/LU b/o-voxel/third_party/eigen/Eigen/LU new file mode 100644 index 0000000000000000000000000000000000000000..672d08507815c5cf9b29f9ba7d67aaf88c511407 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/LU @@ -0,0 +1,49 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LU_MODULE_H +#define EIGEN_LU_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup LU_Module LU module + * This module includes %LU decomposition and related notions such as matrix inversion and determinant. + * This module defines the following MatrixBase methods: + * - MatrixBase::inverse() + * - MatrixBase::determinant() + * + * \code + * #include + * \endcode + */ + +#include "src/misc/Kernel.h" +#include "src/misc/Image.h" + +// IWYU pragma: begin_exports +#include "src/LU/FullPivLU.h" +#include "src/LU/PartialPivLU.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke_helpers.h" +#include "src/LU/PartialPivLU_LAPACKE.h" +#endif +#include "src/LU/Determinant.h" +#include "src/LU/InverseImpl.h" + +#ifndef EIGEN_VECTORIZE_GENERIC +// TODO(rmlarsen): Make these work with generic vectorization if possible. +#if defined EIGEN_VECTORIZE_SSE || defined EIGEN_VECTORIZE_NEON +#include "src/LU/arch/InverseSize4.h" +#endif +#endif +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_LU_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/MetisSupport b/o-voxel/third_party/eigen/Eigen/MetisSupport new file mode 100644 index 0000000000000000000000000000000000000000..6cf61976b579083508b4825099618582a8adcfa6 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/MetisSupport @@ -0,0 +1,35 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_METISSUPPORT_MODULE_H +#define EIGEN_METISSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + +/** \ingroup Support_modules + * \defgroup MetisSupport_Module MetisSupport module + * + * \code + * #include + * \endcode + * This module defines an interface to the METIS reordering package (http://glaros.dtc.umn.edu/gkhome/views/metis). + * It can be used just as any other built-in method as explained in \link OrderingMethods_Module here. \endlink + */ + +// IWYU pragma: begin_exports +#include "src/MetisSupport/MetisSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_METISSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/OrderingMethods b/o-voxel/third_party/eigen/Eigen/OrderingMethods new file mode 100644 index 0000000000000000000000000000000000000000..0875154f5c72a6262cffc6ff822cf852a6a766af --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/OrderingMethods @@ -0,0 +1,73 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ORDERINGMETHODS_MODULE_H +#define EIGEN_ORDERINGMETHODS_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup OrderingMethods_Module OrderingMethods module + * + * This module is currently for internal use only + * + * It defines various built-in and external ordering methods for sparse matrices. + * They are typically used to reduce the number of elements during + * the sparse matrix decomposition (LLT, LU, QR). + * Precisely, in a preprocessing step, a permutation matrix P is computed using + * those ordering methods and applied to the columns of the matrix. + * Using for instance the sparse Cholesky decomposition, it is expected that + * the nonzeros elements in LLT(A*P) will be much smaller than that in LLT(A). + * + * + * Usage : + * \code + * #include + * \endcode + * + * A simple usage is as a template parameter in the sparse decomposition classes : + * + * \code + * SparseLU > solver; + * \endcode + * + * \code + * SparseQR > solver; + * \endcode + * + * It is possible as well to call directly a particular ordering method for your own purpose, + * \code + * AMDOrdering ordering; + * PermutationMatrix perm; + * SparseMatrix A; + * //Fill the matrix ... + * + * ordering(A, perm); // Call AMD + * \endcode + * + * \note Some of these methods (like AMD or METIS), need the sparsity pattern + * of the input matrix to be symmetric. When the matrix is structurally unsymmetric, + * Eigen computes internally the pattern of \f$A^T*A\f$ before calling the method. + * If your matrix is already symmetric (at least in structure), you can avoid that + * by calling the method with a SelfAdjointView type. + * + * \code + * // Call the ordering on the pattern of the lower triangular matrix A + * ordering(A.selfadjointView(), perm); + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/OrderingMethods/Amd.h" +#include "src/OrderingMethods/Ordering.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_ORDERINGMETHODS_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/PaStiXSupport b/o-voxel/third_party/eigen/Eigen/PaStiXSupport new file mode 100644 index 0000000000000000000000000000000000000000..b0b7047aaa09dede87f76f7c0204af04dc6c9dc2 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/PaStiXSupport @@ -0,0 +1,51 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PASTIXSUPPORT_MODULE_H +#define EIGEN_PASTIXSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +#include +} + +#ifdef complex +#undef complex +#endif + +/** \ingroup Support_modules + * \defgroup PaStiXSupport_Module PaStiXSupport module + * + * This module provides an interface to the PaSTiX library. + * PaSTiX is a general \b supernodal, \b parallel and \b opensource sparse solver. + * It provides the two following main factorization classes: + * - class PastixLLT : a supernodal, parallel LLt Cholesky factorization. + * - class PastixLDLT: a supernodal, parallel LDLt Cholesky factorization. + * - class PastixLU : a supernodal, parallel LU factorization (optimized for a symmetric pattern). + * + * \code + * #include + * \endcode + * + * In order to use this module, the PaSTiX headers must be accessible from the include paths, and your binary must be + * linked to the PaSTiX library and its dependencies. This wrapper resuires PaStiX version 5.x compiled without MPI + * support. The dependencies depend on how PaSTiX has been compiled. For a cmake based project, you can use our + * FindPaSTiX.cmake module to help you in this task. + * + */ + +// IWYU pragma: begin_exports +#include "src/PaStiXSupport/PaStiXSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PASTIXSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/PardisoSupport b/o-voxel/third_party/eigen/Eigen/PardisoSupport new file mode 100644 index 0000000000000000000000000000000000000000..8d1e79959701fe867a1c86ae766e0b6423f91674 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/PardisoSupport @@ -0,0 +1,38 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARDISOSUPPORT_MODULE_H +#define EIGEN_PARDISOSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include + +/** \ingroup Support_modules + * \defgroup PardisoSupport_Module PardisoSupport module + * + * This module brings support for the Intel(R) MKL PARDISO direct sparse solvers. + * + * \code + * #include + * \endcode + * + * In order to use this module, the MKL headers must be accessible from the include paths, and your binary must be + * linked to the MKL library and its dependencies. See this \ref TopicUsingIntelMKL "page" for more information on + * MKL-Eigen integration. + * + */ + +// IWYU pragma: begin_exports +#include "src/PardisoSupport/PardisoSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_PARDISOSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/QR b/o-voxel/third_party/eigen/Eigen/QR new file mode 100644 index 0000000000000000000000000000000000000000..35531f5fe6f66117b5b8df15180397ccf816b381 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/QR @@ -0,0 +1,48 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_QR_MODULE_H +#define EIGEN_QR_MODULE_H + +#include "Core" + +#include "Cholesky" +#include "Jacobi" +#include "Householder" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup QR_Module QR module + * + * + * + * This module provides various QR decompositions + * This module also provides some MatrixBase methods, including: + * - MatrixBase::householderQr() + * - MatrixBase::colPivHouseholderQr() + * - MatrixBase::fullPivHouseholderQr() + * + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/QR/HouseholderQR.h" +#include "src/QR/FullPivHouseholderQR.h" +#include "src/QR/ColPivHouseholderQR.h" +#include "src/QR/CompleteOrthogonalDecomposition.h" +#ifdef EIGEN_USE_LAPACKE +#include "src/misc/lapacke_helpers.h" +#include "src/QR/HouseholderQR_LAPACKE.h" +#include "src/QR/ColPivHouseholderQR_LAPACKE.h" +#endif +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_QR_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/QtAlignedMalloc b/o-voxel/third_party/eigen/Eigen/QtAlignedMalloc new file mode 100644 index 0000000000000000000000000000000000000000..b1138974017b8fb82c71a4f57bf9ad64a45cb7d6 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/QtAlignedMalloc @@ -0,0 +1,32 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_QTMALLOC_MODULE_H +#define EIGEN_QTMALLOC_MODULE_H + +#include "Core" + +#if (!EIGEN_MALLOC_ALREADY_ALIGNED) + +#include "src/Core/util/DisableStupidWarnings.h" + +void *qMalloc(std::size_t size) { return Eigen::internal::aligned_malloc(size); } + +void qFree(void *ptr) { Eigen::internal::aligned_free(ptr); } + +void *qRealloc(void *ptr, std::size_t size) { + void *newPtr = Eigen::internal::aligned_malloc(size); + std::memcpy(newPtr, ptr, size); + Eigen::internal::aligned_free(ptr); + return newPtr; +} + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif + +#endif // EIGEN_QTMALLOC_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/SPQRSupport b/o-voxel/third_party/eigen/Eigen/SPQRSupport new file mode 100644 index 0000000000000000000000000000000000000000..9a11ceb05226be183e0cadc80f61fbe7fa6bafe3 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SPQRSupport @@ -0,0 +1,41 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPQRSUPPORT_MODULE_H +#define EIGEN_SPQRSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include "SuiteSparseQR.hpp" + +/** \ingroup Support_modules + * \defgroup SPQRSupport_Module SuiteSparseQR module + * + * This module provides an interface to the SPQR library, which is part of the suitesparse package. + * + * \code + * #include + * \endcode + * + * In order to use this module, the SPQR headers must be accessible from the include paths, and your binary must be + * linked to the SPQR library and its dependencies (Cholmod, AMD, COLAMD,...). For a cmake based project, you can use + * our FindSPQR.cmake and FindCholmod.Cmake modules + * + */ + +#include "CholmodSupport" + +// IWYU pragma: begin_exports +#include "src/SPQRSupport/SuiteSparseQRSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif diff --git a/o-voxel/third_party/eigen/Eigen/SVD b/o-voxel/third_party/eigen/Eigen/SVD new file mode 100644 index 0000000000000000000000000000000000000000..a8e88eae4197aa47f0c3e984f863a771e938d9ca --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SVD @@ -0,0 +1,55 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SVD_MODULE_H +#define EIGEN_SVD_MODULE_H + +#include "QR" +#include "Householder" +#include "Jacobi" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SVD_Module SVD module + * + * + * + * This module provides SVD decomposition for matrices (both real and complex). + * Two decomposition algorithms are provided: + * - JacobiSVD implementing two-sided Jacobi iterations is numerically very accurate, fast for small matrices, but very + * slow for larger ones. + * - BDCSVD implementing a recursive divide & conquer strategy on top of an upper-bidiagonalization which remains fast + * for large problems. These decompositions are accessible via the respective classes and following MatrixBase methods: + * - MatrixBase::jacobiSvd() + * - MatrixBase::bdcSvd() + * + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/SVD/UpperBidiagonalization.h" +#include "src/SVD/SVDBase.h" +#include "src/SVD/JacobiSVD.h" +#include "src/SVD/BDCSVD.h" +#ifdef EIGEN_USE_LAPACKE +#ifdef EIGEN_USE_MKL +#include "mkl_lapacke.h" +#else +#include "src/misc/lapacke.h" +#endif +#ifndef EIGEN_USE_LAPACKE_STRICT +#include "src/SVD/JacobiSVD_LAPACKE.h" +#endif +#include "src/SVD/BDCSVD_LAPACKE.h" +#endif +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SVD_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Sparse b/o-voxel/third_party/eigen/Eigen/Sparse new file mode 100644 index 0000000000000000000000000000000000000000..3e26d0a21e1cb054ca9606b9817b843dffe8c00b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Sparse @@ -0,0 +1,33 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSE_MODULE_H +#define EIGEN_SPARSE_MODULE_H + +/** \defgroup Sparse_Module Sparse meta-module + * + * Meta-module including all related modules: + * - \ref SparseCore_Module + * - \ref OrderingMethods_Module + * - \ref SparseCholesky_Module + * - \ref SparseLU_Module + * - \ref SparseQR_Module + * - \ref IterativeLinearSolvers_Module + * + \code + #include + \endcode + */ + +#include "SparseCore" +#include "OrderingMethods" +#include "SparseCholesky" +#include "SparseLU" +#include "SparseQR" +#include "IterativeLinearSolvers" + +#endif // EIGEN_SPARSE_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/SparseCholesky b/o-voxel/third_party/eigen/Eigen/SparseCholesky new file mode 100644 index 0000000000000000000000000000000000000000..90c5921ee2e75a5251376dc83176fc09544bb769 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SparseCholesky @@ -0,0 +1,40 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2013 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECHOLESKY_MODULE_H +#define EIGEN_SPARSECHOLESKY_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** + * \defgroup SparseCholesky_Module SparseCholesky module + * + * This module currently provides two variants of the direct sparse Cholesky decomposition for selfadjoint (hermitian) + * matrices. Those decompositions are accessible via the following classes: + * - SimplicialLLt, + * - SimplicialLDLt + * + * Such problems can also be solved using the ConjugateGradient solver from the IterativeLinearSolvers module. + * + * \code + * #include + * \endcode + */ + +// IWYU pragma: begin_exports +#include "src/SparseCholesky/SimplicialCholesky.h" +#include "src/SparseCholesky/SimplicialCholesky_impl.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECHOLESKY_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/SparseCore b/o-voxel/third_party/eigen/Eigen/SparseCore new file mode 100644 index 0000000000000000000000000000000000000000..d8116e49b6b8551b61c3de52d2ab2cd96b14343d --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SparseCore @@ -0,0 +1,70 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSECORE_MODULE_H +#define EIGEN_SPARSECORE_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +#include +#include +#include +#include +#include +#include + +/** + * \defgroup SparseCore_Module SparseCore module + * + * This module provides a sparse matrix representation, and basic associated matrix manipulations + * and operations. + * + * See the \ref TutorialSparse "Sparse tutorial" + * + * \code + * #include + * \endcode + * + * This module depends on: Core. + */ + +// IWYU pragma: begin_exports +#include "src/SparseCore/SparseUtil.h" +#include "src/SparseCore/SparseMatrixBase.h" +#include "src/SparseCore/SparseAssign.h" +#include "src/SparseCore/CompressedStorage.h" +#include "src/SparseCore/AmbiVector.h" +#include "src/SparseCore/SparseCompressedBase.h" +#include "src/SparseCore/SparseMatrix.h" +#include "src/SparseCore/SparseMap.h" +#include "src/SparseCore/SparseVector.h" +#include "src/SparseCore/SparseRef.h" +#include "src/SparseCore/SparseCwiseUnaryOp.h" +#include "src/SparseCore/SparseCwiseBinaryOp.h" +#include "src/SparseCore/SparseTranspose.h" +#include "src/SparseCore/SparseBlock.h" +#include "src/SparseCore/SparseDot.h" +#include "src/SparseCore/SparseRedux.h" +#include "src/SparseCore/SparseView.h" +#include "src/SparseCore/SparseDiagonalProduct.h" +#include "src/SparseCore/ConservativeSparseSparseProduct.h" +#include "src/SparseCore/SparseSparseProductWithPruning.h" +#include "src/SparseCore/SparseProduct.h" +#include "src/SparseCore/SparseDenseProduct.h" +#include "src/SparseCore/SparseSelfAdjointView.h" +#include "src/SparseCore/SparseTriangularView.h" +#include "src/SparseCore/TriangularSolver.h" +#include "src/SparseCore/SparsePermutation.h" +#include "src/SparseCore/SparseFuzzy.h" +#include "src/SparseCore/SparseSolverBase.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSECORE_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/SparseLU b/o-voxel/third_party/eigen/Eigen/SparseLU new file mode 100644 index 0000000000000000000000000000000000000000..c594da1258c8a71b2482e36b2fd92eed25e5c2de --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SparseLU @@ -0,0 +1,50 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Désiré Nuentsa-Wakam +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSELU_MODULE_H +#define EIGEN_SPARSELU_MODULE_H + +#include "SparseCore" + +/** + * \defgroup SparseLU_Module SparseLU module + * This module defines a supernodal factorization of general sparse matrices. + * The code is fully optimized for supernode-panel updates with specialized kernels. + * Please, see the documentation of the SparseLU class for more details. + */ + +// Ordering interface +#include "OrderingMethods" + +#include "src/Core/util/DisableStupidWarnings.h" + +// IWYU pragma: begin_exports +#include "src/SparseLU/SparseLU_Structs.h" +#include "src/SparseLU/SparseLU_SupernodalMatrix.h" +#include "src/SparseLU/SparseLUImpl.h" +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseLU/SparseLU_Memory.h" +#include "src/SparseLU/SparseLU_heap_relax_snode.h" +#include "src/SparseLU/SparseLU_relax_snode.h" +#include "src/SparseLU/SparseLU_pivotL.h" +#include "src/SparseLU/SparseLU_panel_dfs.h" +#include "src/SparseLU/SparseLU_kernel_bmod.h" +#include "src/SparseLU/SparseLU_panel_bmod.h" +#include "src/SparseLU/SparseLU_column_dfs.h" +#include "src/SparseLU/SparseLU_column_bmod.h" +#include "src/SparseLU/SparseLU_copy_to_ucol.h" +#include "src/SparseLU/SparseLU_pruneL.h" +#include "src/SparseLU/SparseLU_Utils.h" +#include "src/SparseLU/SparseLU.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SPARSELU_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/SparseQR b/o-voxel/third_party/eigen/Eigen/SparseQR new file mode 100644 index 0000000000000000000000000000000000000000..56390283cdafbfa035b6d0dfa99e365b1de361fe --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SparseQR @@ -0,0 +1,38 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SPARSEQR_MODULE_H +#define EIGEN_SPARSEQR_MODULE_H + +#include "SparseCore" +#include "OrderingMethods" +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup SparseQR_Module SparseQR module + * \brief Provides QR decomposition for sparse matrices + * + * This module provides a simplicial version of the left-looking Sparse QR decomposition. + * The columns of the input matrix should be reordered to limit the fill-in during the + * decomposition. Built-in methods (COLAMD, AMD) or external methods (METIS) can be used to this end. + * See the \link OrderingMethods_Module OrderingMethods\endlink module for the list + * of built-in and external ordering methods. + * + * \code + * #include + * \endcode + * + * + */ + +// IWYU pragma: begin_exports +#include "src/SparseCore/SparseColEtree.h" +#include "src/SparseQR/SparseQR.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif diff --git a/o-voxel/third_party/eigen/Eigen/StdDeque b/o-voxel/third_party/eigen/Eigen/StdDeque new file mode 100644 index 0000000000000000000000000000000000000000..e51c37ba1cac348274d467e6a04916286f784d7c --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/StdDeque @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDDEQUE_MODULE_H +#define EIGEN_STDDEQUE_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && \ + (EIGEN_MAX_STATIC_ALIGN_BYTES <= 16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ + +#define EIGEN_DEFINE_STL_DEQUE_SPECIALIZATION(...) + +#else + +// IWYU pragma: begin_exports +#include "src/StlSupport/StdDeque.h" +// IWYU pragma: end_exports + +#endif + +#endif // EIGEN_STDDEQUE_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/StdList b/o-voxel/third_party/eigen/Eigen/StdList new file mode 100644 index 0000000000000000000000000000000000000000..5fe6793b2eb12d9e2b81547f0c6e0f5d1b5e0691 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/StdList @@ -0,0 +1,29 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDLIST_MODULE_H +#define EIGEN_STDLIST_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && \ + (EIGEN_MAX_STATIC_ALIGN_BYTES <= 16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ + +#define EIGEN_DEFINE_STL_LIST_SPECIALIZATION(...) + +#else + +// IWYU pragma: begin_exports +#include "src/StlSupport/StdList.h" +// IWYU pragma: end_exports + +#endif + +#endif // EIGEN_STDLIST_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/StdVector b/o-voxel/third_party/eigen/Eigen/StdVector new file mode 100644 index 0000000000000000000000000000000000000000..80ea165205c33a4ea8f06242e4b2f3ed6012f479 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/StdVector @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2009 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STDVECTOR_MODULE_H +#define EIGEN_STDVECTOR_MODULE_H + +#include "Core" +#include + +#if EIGEN_COMP_MSVC && EIGEN_OS_WIN64 && \ + (EIGEN_MAX_STATIC_ALIGN_BYTES <= 16) /* MSVC auto aligns up to 16 bytes in 64 bit builds */ + +#define EIGEN_DEFINE_STL_VECTOR_SPECIALIZATION(...) + +#else + +// IWYU pragma: begin_exports +#include "src/StlSupport/StdVector.h" +// IWYU pragma: end_exports + +#endif + +#endif // EIGEN_STDVECTOR_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/SuperLUSupport b/o-voxel/third_party/eigen/Eigen/SuperLUSupport new file mode 100644 index 0000000000000000000000000000000000000000..6ca63e29b96286601e87bd4f8190deaeb79ebc39 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/SuperLUSupport @@ -0,0 +1,70 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SUPERLUSUPPORT_MODULE_H +#define EIGEN_SUPERLUSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +#ifdef EMPTY +#define EIGEN_EMPTY_WAS_ALREADY_DEFINED +#endif + +typedef int int_t; +#include +#include +#include + +// slu_util.h defines a preprocessor token named EMPTY which is really polluting, +// so we remove it in favor of a SUPERLU_EMPTY token. +// If EMPTY was already defined then we don't undef it. + +#if defined(EIGEN_EMPTY_WAS_ALREADY_DEFINED) +#undef EIGEN_EMPTY_WAS_ALREADY_DEFINED +#elif defined(EMPTY) +#undef EMPTY +#endif + +#define SUPERLU_EMPTY (-1) + +namespace Eigen { +struct SluMatrix; +} + +/** \ingroup Support_modules + * \defgroup SuperLUSupport_Module SuperLUSupport module + * + * This module provides an interface to the SuperLU library. + * It provides the following factorization class: + * - class SuperLU: a supernodal sequential LU factorization. + * - class SuperILU: a supernodal sequential incomplete LU factorization (to be used as a preconditioner for iterative + * methods). + * + * \warning This wrapper requires at least versions 4.0 of SuperLU. The 3.x versions are not supported. + * + * \warning When including this module, you have to use SUPERLU_EMPTY instead of EMPTY which is no longer defined + * because it is too polluting. + * + * \code + * #include + * \endcode + * + * In order to use this module, the superlu headers must be accessible from the include paths, and your binary must be + * linked to the superlu library and its dependencies. The dependencies depend on how superlu has been compiled. For a + * cmake based project, you can use our FindSuperLU.cmake module to help you in this task. + * + */ + +// IWYU pragma: begin_exports +#include "src/SuperLUSupport/SuperLUSupport.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_SUPERLUSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/ThreadPool b/o-voxel/third_party/eigen/Eigen/ThreadPool new file mode 100644 index 0000000000000000000000000000000000000000..cf5f3b2a1ab73f78c2eadf0e14814f8323c7d7bb --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/ThreadPool @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Benoit Steiner +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_THREADPOOL_MODULE_H +#define EIGEN_THREADPOOL_MODULE_H + +#include "Core" + +#include "src/Core/util/DisableStupidWarnings.h" + +/** \defgroup ThreadPool_Module ThreadPool Module + * + * This module provides 2 threadpool implementations + * - a simple reference implementation + * - a faster non blocking implementation + * + * \code + * #include + * \endcode + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// There are non-parenthesized calls to "max" in the header, +// which trigger a check in test/main.h causing compilation to fail. +// We work around the check here by removing the check for max in +// the case where we have to emulate thread_local. +#ifdef max +#undef max +#endif +#include + +#include "src/Core/util/Meta.h" +#include "src/Core/util/MaxSizeVector.h" + +#ifndef EIGEN_MUTEX +#define EIGEN_MUTEX std::mutex +#endif +#ifndef EIGEN_MUTEX_LOCK +#define EIGEN_MUTEX_LOCK std::unique_lock +#endif +#ifndef EIGEN_CONDVAR +#define EIGEN_CONDVAR std::condition_variable +#endif + +// IWYU pragma: begin_exports +#include "src/ThreadPool/ThreadLocal.h" +#include "src/ThreadPool/ThreadYield.h" +#include "src/ThreadPool/ThreadCancel.h" +#include "src/ThreadPool/EventCount.h" +#include "src/ThreadPool/RunQueue.h" +#include "src/ThreadPool/ThreadPoolInterface.h" +#include "src/ThreadPool/ThreadEnvironment.h" +#include "src/ThreadPool/Barrier.h" +#include "src/ThreadPool/NonBlockingThreadPool.h" +#include "src/ThreadPool/CoreThreadPoolDevice.h" +#include "src/ThreadPool/ForkJoin.h" +// IWYU pragma: end_exports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_CXX11_THREADPOOL_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/UmfPackSupport b/o-voxel/third_party/eigen/Eigen/UmfPackSupport new file mode 100644 index 0000000000000000000000000000000000000000..eeb0b38594e467677720e06c3fd04bc6c2ee85d0 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/UmfPackSupport @@ -0,0 +1,42 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_UMFPACKSUPPORT_MODULE_H +#define EIGEN_UMFPACKSUPPORT_MODULE_H + +#include "SparseCore" + +#include "src/Core/util/DisableStupidWarnings.h" + +extern "C" { +#include +} + +/** \ingroup Support_modules + * \defgroup UmfPackSupport_Module UmfPackSupport module + * + * This module provides an interface to the UmfPack library which is part of the suitesparse package. It provides the following factorization class: + * - class UmfPackLU: a multifrontal sequential LU factorization. + * + * \code + * #include + * \endcode + * + * In order to use this module, the umfpack headers must be accessible from the include paths, and your binary must be + * linked to the umfpack library and its dependencies. The dependencies depend on how umfpack has been compiled. For a + * cmake based project, you can use our FindUmfPack.cmake module to help you in this task. + * + */ + +// IWYU pragma: begin_exports +#include "src/UmfPackSupport/UmfPackSupport.h" +// IWYU pragma: endexports + +#include "src/Core/util/ReenableStupidWarnings.h" + +#endif // EIGEN_UMFPACKSUPPORT_MODULE_H diff --git a/o-voxel/third_party/eigen/Eigen/Version b/o-voxel/third_party/eigen/Eigen/Version new file mode 100644 index 0000000000000000000000000000000000000000..aadf39f8e15f2fbb74c5816db7a5a684bfad7674 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/Version @@ -0,0 +1,14 @@ +#ifndef EIGEN_VERSION_H +#define EIGEN_VERSION_H + +// The "WORLD" version will forever remain "3" for the "Eigen3" library. +#define EIGEN_WORLD_VERSION 3 +// As of Eigen3 5.0.0, we have moved to Semantic Versioning (semver.org). +#define EIGEN_MAJOR_VERSION 5 +#define EIGEN_MINOR_VERSION 0 +#define EIGEN_PATCH_VERSION 1 +#define EIGEN_PRERELEASE_VERSION "dev" +#define EIGEN_BUILD_VERSION "master" +#define EIGEN_VERSION_STRING "5.0.1-dev+master" + +#endif // EIGEN_VERSION_H diff --git a/o-voxel/third_party/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h b/o-voxel/third_party/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h new file mode 100644 index 0000000000000000000000000000000000000000..1ac92cebfd1a5dbad21b0419325b145a65921a20 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/AccelerateSupport/AccelerateSupport.h @@ -0,0 +1,423 @@ +#ifndef EIGEN_ACCELERATESUPPORT_H +#define EIGEN_ACCELERATESUPPORT_H + +#include + +#include + +namespace Eigen { + +template +class AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateLLT + * \brief A direct Cholesky (LLT) factorization and solver based on Accelerate + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ additional information about the matrix structure. Default is Lower. + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateLLT + */ +template +using AccelerateLLT = AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateLDLT + * \brief The default Cholesky (LDLT) factorization and solver based on Accelerate + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ additional information about the matrix structure. Default is Lower. + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLT + */ +template +using AccelerateLDLT = AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateLDLTUnpivoted + * \brief A direct Cholesky-like LDL^T factorization and solver based on Accelerate with only 1x1 pivots and no pivoting + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ additional information about the matrix structure. Default is Lower. + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTUnpivoted + */ +template +using AccelerateLDLTUnpivoted = AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateLDLTSBK + * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with Supernode Bunch-Kaufman and static + * pivoting + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ additional information about the matrix structure. Default is Lower. + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTSBK + */ +template +using AccelerateLDLTSBK = AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateLDLTTPP + * \brief A direct Cholesky (LDLT) factorization and solver based on Accelerate with full threshold partial pivoting + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ additional information about the matrix structure. Default is Lower. + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateLDLTTPP + */ +template +using AccelerateLDLTTPP = AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateQR + * \brief A QR factorization and solver based on Accelerate + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateQR + */ +template +using AccelerateQR = AccelerateImpl; + +/** \ingroup AccelerateSupport_Module + * \typedef AccelerateCholeskyAtA + * \brief A QR factorization and solver based on Accelerate without storing Q (equivalent to A^TA = R^T R) + * + * \warning Only single and double precision real scalar types are supported by Accelerate + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * + * \sa \ref TutorialSparseSolverConcept, class AccelerateCholeskyAtA + */ +template +using AccelerateCholeskyAtA = AccelerateImpl; + +namespace internal { +template +struct AccelFactorizationDeleter { + void operator()(T* sym) { + if (sym) { + SparseCleanup(*sym); + delete sym; + sym = nullptr; + } + } +}; + +template +struct SparseTypesTraitBase { + typedef DenseVecT AccelDenseVector; + typedef DenseMatT AccelDenseMatrix; + typedef SparseMatT AccelSparseMatrix; + + typedef SparseOpaqueSymbolicFactorization SymbolicFactorization; + typedef NumFactT NumericFactorization; + + typedef AccelFactorizationDeleter SymbolicFactorizationDeleter; + typedef AccelFactorizationDeleter NumericFactorizationDeleter; +}; + +template +struct SparseTypesTrait {}; + +template <> +struct SparseTypesTrait : SparseTypesTraitBase {}; + +template <> +struct SparseTypesTrait + : SparseTypesTraitBase { +}; + +} // end namespace internal + +template +class AccelerateImpl : public SparseSolverBase > { + protected: + using Base = SparseSolverBase; + using Base::derived; + using Base::m_isInitialized; + + public: + using Base::_solve_impl; + + typedef MatrixType_ MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { ColsAtCompileTime = Dynamic, MaxColsAtCompileTime = Dynamic }; + enum { UpLo = UpLo_ }; + + using AccelDenseVector = typename internal::SparseTypesTrait::AccelDenseVector; + using AccelDenseMatrix = typename internal::SparseTypesTrait::AccelDenseMatrix; + using AccelSparseMatrix = typename internal::SparseTypesTrait::AccelSparseMatrix; + using SymbolicFactorization = typename internal::SparseTypesTrait::SymbolicFactorization; + using NumericFactorization = typename internal::SparseTypesTrait::NumericFactorization; + using SymbolicFactorizationDeleter = typename internal::SparseTypesTrait::SymbolicFactorizationDeleter; + using NumericFactorizationDeleter = typename internal::SparseTypesTrait::NumericFactorizationDeleter; + + AccelerateImpl() { + m_isInitialized = false; + + auto check_flag_set = [](int value, int flag) { return ((value & flag) == flag); }; + + if (check_flag_set(UpLo_, Symmetric)) { + m_sparseKind = SparseSymmetric; + m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle; + } else if (check_flag_set(UpLo_, UnitLower)) { + m_sparseKind = SparseUnitTriangular; + m_triType = SparseLowerTriangle; + } else if (check_flag_set(UpLo_, UnitUpper)) { + m_sparseKind = SparseUnitTriangular; + m_triType = SparseUpperTriangle; + } else if (check_flag_set(UpLo_, StrictlyLower)) { + m_sparseKind = SparseTriangular; + m_triType = SparseLowerTriangle; + } else if (check_flag_set(UpLo_, StrictlyUpper)) { + m_sparseKind = SparseTriangular; + m_triType = SparseUpperTriangle; + } else if (check_flag_set(UpLo_, Lower)) { + m_sparseKind = SparseTriangular; + m_triType = SparseLowerTriangle; + } else if (check_flag_set(UpLo_, Upper)) { + m_sparseKind = SparseTriangular; + m_triType = SparseUpperTriangle; + } else { + m_sparseKind = SparseOrdinary; + m_triType = (UpLo_ & Lower) ? SparseLowerTriangle : SparseUpperTriangle; + } + + m_order = SparseOrderDefault; + } + + explicit AccelerateImpl(const MatrixType& matrix) : AccelerateImpl() { compute(matrix); } + + ~AccelerateImpl() {} + + inline Index cols() const { return m_nCols; } + inline Index rows() const { return m_nRows; } + + ComputationInfo info() const { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + void analyzePattern(const MatrixType& matrix); + + void factorize(const MatrixType& matrix); + + void compute(const MatrixType& matrix); + + template + void _solve_impl(const MatrixBase& b, MatrixBase& dest) const; + + /** Sets the ordering algorithm to use. */ + void setOrder(SparseOrder_t order) { m_order = order; } + + private: + template + void buildAccelSparseMatrix(const SparseMatrix& a, AccelSparseMatrix& A, std::vector& columnStarts) { + const Index nColumnsStarts = a.cols() + 1; + + columnStarts.resize(nColumnsStarts); + + for (Index i = 0; i < nColumnsStarts; i++) columnStarts[i] = a.outerIndexPtr()[i]; + + SparseAttributes_t attributes{}; + attributes.transpose = false; + attributes.triangle = m_triType; + attributes.kind = m_sparseKind; + + SparseMatrixStructure structure{}; + structure.attributes = attributes; + structure.rowCount = static_cast(a.rows()); + structure.columnCount = static_cast(a.cols()); + structure.blockSize = 1; + structure.columnStarts = columnStarts.data(); + structure.rowIndices = const_cast(a.innerIndexPtr()); + + A.structure = structure; + A.data = const_cast(a.valuePtr()); + } + + void doAnalysis(AccelSparseMatrix& A) { + m_numericFactorization.reset(nullptr); + + SparseSymbolicFactorOptions opts{}; + opts.control = SparseDefaultControl; + opts.orderMethod = m_order; + opts.order = nullptr; + opts.ignoreRowsAndColumns = nullptr; + opts.malloc = malloc; + opts.free = free; + opts.reportError = nullptr; + + m_symbolicFactorization.reset(new SymbolicFactorization(SparseFactor(Solver_, A.structure, opts))); + + SparseStatus_t status = m_symbolicFactorization->status; + + updateInfoStatus(status); + + if (status != SparseStatusOK) m_symbolicFactorization.reset(nullptr); + } + + void doFactorization(AccelSparseMatrix& A) { + SparseStatus_t status = SparseStatusReleased; + + if (m_symbolicFactorization) { + m_numericFactorization.reset(new NumericFactorization(SparseFactor(*m_symbolicFactorization, A))); + + status = m_numericFactorization->status; + + if (status != SparseStatusOK) m_numericFactorization.reset(nullptr); + } + + updateInfoStatus(status); + } + + protected: + void updateInfoStatus(SparseStatus_t status) const { + switch (status) { + case SparseStatusOK: + m_info = Success; + break; + case SparseFactorizationFailed: + case SparseMatrixIsSingular: + m_info = NumericalIssue; + break; + case SparseInternalError: + case SparseParameterError: + case SparseStatusReleased: + default: + m_info = InvalidInput; + break; + } + } + + mutable ComputationInfo m_info; + Index m_nRows, m_nCols; + std::unique_ptr m_symbolicFactorization; + std::unique_ptr m_numericFactorization; + SparseKind_t m_sparseKind; + SparseTriangle_t m_triType; + SparseOrder_t m_order; +}; + +/** Computes the symbolic and numeric decomposition of matrix \a a */ +template +void AccelerateImpl::compute(const MatrixType& a) { + if (EnforceSquare_) eigen_assert(a.rows() == a.cols()); + + m_nRows = a.rows(); + m_nCols = a.cols(); + + AccelSparseMatrix A{}; + std::vector columnStarts; + + buildAccelSparseMatrix(a, A, columnStarts); + + doAnalysis(A); + + if (m_symbolicFactorization) doFactorization(A); + + m_isInitialized = true; +} + +/** Performs a symbolic decomposition on the sparsity pattern of matrix \a a. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ +template +void AccelerateImpl::analyzePattern(const MatrixType& a) { + if (EnforceSquare_) eigen_assert(a.rows() == a.cols()); + + m_nRows = a.rows(); + m_nCols = a.cols(); + + AccelSparseMatrix A{}; + std::vector columnStarts; + + buildAccelSparseMatrix(a, A, columnStarts); + + doAnalysis(A); + + m_isInitialized = true; +} + +/** Performs a numeric decomposition of matrix \a a. + * + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been + * performed. + * + * \sa analyzePattern() + */ +template +void AccelerateImpl::factorize(const MatrixType& a) { + eigen_assert(m_symbolicFactorization && "You must first call analyzePattern()"); + eigen_assert(m_nRows == a.rows() && m_nCols == a.cols()); + + if (EnforceSquare_) eigen_assert(a.rows() == a.cols()); + + AccelSparseMatrix A{}; + std::vector columnStarts; + + buildAccelSparseMatrix(a, A, columnStarts); + + doFactorization(A); +} + +template +template +void AccelerateImpl::_solve_impl(const MatrixBase& b, + MatrixBase& x) const { + if (!m_numericFactorization) { + m_info = InvalidInput; + return; + } + + eigen_assert(m_nRows == b.rows()); + eigen_assert(((b.cols() == 1) || b.outerStride() == b.rows())); + + SparseStatus_t status = SparseStatusOK; + + Scalar* b_ptr = const_cast(b.derived().data()); + Scalar* x_ptr = const_cast(x.derived().data()); + + AccelDenseMatrix xmat{}; + xmat.attributes = SparseAttributes_t(); + xmat.columnCount = static_cast(x.cols()); + xmat.rowCount = static_cast(x.rows()); + xmat.columnStride = xmat.rowCount; + xmat.data = x_ptr; + + AccelDenseMatrix bmat{}; + bmat.attributes = SparseAttributes_t(); + bmat.columnCount = static_cast(b.cols()); + bmat.rowCount = static_cast(b.rows()); + bmat.columnStride = bmat.rowCount; + bmat.data = b_ptr; + + SparseSolve(*m_numericFactorization, bmat, xmat); + + updateInfoStatus(status); +} + +} // end namespace Eigen + +#endif // EIGEN_ACCELERATESUPPORT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h b/o-voxel/third_party/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h new file mode 100644 index 0000000000000000000000000000000000000000..85ea3166fbe59f4890b412c9f73fbbdb785f913b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/AccelerateSupport/InternalHeaderCheck.h @@ -0,0 +1,3 @@ +#ifndef EIGEN_ACCELERATESUPPORT_MODULE_H +#error "Please include Eigen/AccelerateSupport instead of including headers inside the src directory directly." +#endif diff --git a/o-voxel/third_party/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h b/o-voxel/third_party/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h new file mode 100644 index 0000000000000000000000000000000000000000..31348090c3ce455dcebadb54acc4ea148b6b71af --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Cholesky/InternalHeaderCheck.h @@ -0,0 +1,3 @@ +#ifndef EIGEN_CHOLESKY_MODULE_H +#error "Please include Eigen/Cholesky instead of including headers inside the src directory directly." +#endif diff --git a/o-voxel/third_party/eigen/Eigen/src/Cholesky/LDLT.h b/o-voxel/third_party/eigen/Eigen/src/Cholesky/LDLT.h new file mode 100644 index 0000000000000000000000000000000000000000..46100b20a275d066d50a9e3c2ccf03a8f3517d86 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Cholesky/LDLT.h @@ -0,0 +1,649 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// Copyright (C) 2009 Keir Mierle +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2011 Timothy E. Holy +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LDLT_H +#define EIGEN_LDLT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : traits { + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + typedef int StorageIndex; + enum { Flags = 0 }; +}; + +template +struct LDLT_Traits; + +// PositiveSemiDef means positive semi-definite and non-zero; same for NegativeSemiDef +enum SignMatrix { PositiveSemiDef, NegativeSemiDef, ZeroSign, Indefinite }; +} // namespace internal + +/** \ingroup Cholesky_Module + * + * \class LDLT + * + * \brief Robust Cholesky decomposition of a matrix with pivoting + * + * \tparam MatrixType_ the type of the matrix of which to compute the LDL^T Cholesky decomposition + * \tparam UpLo_ the triangular part that will be used for the decomposition: Lower (default) or Upper. + * The other triangular part won't be read. + * + * Perform a robust Cholesky decomposition of a positive semidefinite or negative semidefinite + * matrix \f$ A \f$ such that \f$ A = P^TLDL^*P \f$, where P is a permutation matrix, L + * is lower triangular with a unit diagonal and D is a diagonal matrix. + * + * The decomposition uses pivoting to ensure stability, so that D will have + * zeros in the bottom right rank(A) - n submatrix. Avoiding the square root + * on D also stabilizes the computation. + * + * Remember that Cholesky decompositions are not rank-revealing. Also, do not use a Cholesky + * decomposition to determine whether a system of equations has a solution. + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt(), class LLT + */ +template +class LDLT : public SolverBase > { + public: + typedef MatrixType_ MatrixType; + typedef SolverBase Base; + friend class SolverBase; + + EIGEN_GENERIC_PUBLIC_INTERFACE(LDLT) + enum { + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + UpLo = UpLo_ + }; + typedef Matrix TmpMatrixType; + + typedef Transpositions TranspositionType; + typedef PermutationMatrix PermutationType; + + typedef internal::LDLT_Traits Traits; + + /** \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LDLT::compute(const MatrixType&). + */ + LDLT() : m_matrix(), m_transpositions(), m_sign(internal::ZeroSign), m_isInitialized(false) {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LDLT() + */ + explicit LDLT(Index size) + : m_matrix(size, size), + m_transpositions(size), + m_temporary(size), + m_sign(internal::ZeroSign), + m_isInitialized(false) {} + + /** \brief Constructor with decomposition + * + * This calculates the decomposition for the input \a matrix. + * + * \sa LDLT(Index size) + */ + template + explicit LDLT(const EigenBase& matrix) + : m_matrix(matrix.rows(), matrix.cols()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) { + compute(matrix.derived()); + } + + /** \brief Constructs a LDLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when \c + * MatrixType is a Eigen::Ref. + * + * \sa LDLT(const EigenBase&) + */ + template + explicit LDLT(EigenBase& matrix) + : m_matrix(matrix.derived()), + m_transpositions(matrix.rows()), + m_temporary(matrix.rows()), + m_sign(internal::ZeroSign), + m_isInitialized(false) { + compute(matrix.derived()); + } + + /** Clear any existing decomposition + * \sa rankUpdate(w,sigma) + */ + void setZero() { m_isInitialized = false; } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return Traits::getL(m_matrix); + } + + /** \returns the permutation matrix P as a transposition sequence. + */ + inline const TranspositionType& transpositionsP() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_transpositions; + } + + /** \returns the coefficients of the diagonal matrix D */ + inline Diagonal vectorD() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix.diagonal(); + } + + /** \returns true if the matrix is positive (semidefinite) */ + inline bool isPositive() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::PositiveSemiDef || m_sign == internal::ZeroSign; + } + + /** \returns true if the matrix is negative (semidefinite) */ + inline bool isNegative(void) const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_sign == internal::NegativeSemiDef || m_sign == internal::ZeroSign; + } + +#ifdef EIGEN_PARSED_BY_DOXYGEN + /** \returns a solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * This function also supports in-place solves using the syntax x = decompositionObject.solve(x) . + * + * \note_about_checking_solutions + * + * More precisely, this method solves \f$ A x = b \f$ using the decomposition \f$ A = P^T L D L^* P \f$ + * by solving the systems \f$ P^T y_1 = b \f$, \f$ L y_2 = y_1 \f$, \f$ D y_3 = y_2 \f$, + * \f$ L^* y_4 = y_3 \f$ and \f$ P x = y_4 \f$ in succession. If the matrix \f$ A \f$ is singular, then + * \f$ D \f$ will also be singular (all the other matrices are invertible). In that case, the + * least-square solution of \f$ D y_3 = y_2 \f$ is computed. This does not mean that this function + * computes the least-square solution of \f$ A x = b \f$ if \f$ A \f$ is singular. + * + * \sa MatrixBase::ldlt(), SelfAdjointView::ldlt() + */ + template + inline const Solve solve(const MatrixBase& b) const; +#endif + + template + bool solveInPlace(MatrixBase& bAndX) const; + + template + LDLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the LDLT decomposition. + */ + RealScalar rcond() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + template + LDLT& rankUpdate(const MatrixBase& w, const RealScalar& alpha = 1); + + /** \returns the internal LDLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLDLT() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix + * is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LDLT& adjoint() const { return *this; } + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was successful, + * \c NumericalIssue if the factorization failed because of a zero pivot. + */ + ComputationInfo info() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + return m_info; + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + void _solve_impl(const RhsType& rhs, DstType& dst) const; + + template + void _solve_impl_transposed(const RhsType& rhs, DstType& dst) const; +#endif + + protected: + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + + /** \internal + * Used to compute and store the Cholesky decomposition A = L D L^* = U^* D U. + * The strict upper part is used during the decomposition, the strict lower + * part correspond to the coefficients of L (its diagonal is equal to 1 and + * is not stored), and the diagonal entries correspond to D. + */ + MatrixType m_matrix; + RealScalar m_l1_norm; + TranspositionType m_transpositions; + TmpMatrixType m_temporary; + internal::SignMatrix m_sign; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template +struct ldlt_inplace; + +template <> +struct ldlt_inplace { + template + static bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, SignMatrix& sign) { + using std::abs; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename TranspositionType::StorageIndex IndexType; + eigen_assert(mat.rows() == mat.cols()); + const Index size = mat.rows(); + bool found_zero_pivot = false; + bool ret = true; + + if (size <= 1) { + transpositions.setIdentity(); + if (size == 0) + sign = ZeroSign; + else if (numext::real(mat.coeff(0, 0)) > static_cast(0)) + sign = PositiveSemiDef; + else if (numext::real(mat.coeff(0, 0)) < static_cast(0)) + sign = NegativeSemiDef; + else + sign = ZeroSign; + return true; + } + + for (Index k = 0; k < size; ++k) { + // Find largest diagonal element + Index index_of_biggest_in_corner; + mat.diagonal().tail(size - k).cwiseAbs().maxCoeff(&index_of_biggest_in_corner); + index_of_biggest_in_corner += k; + + transpositions.coeffRef(k) = IndexType(index_of_biggest_in_corner); + if (k != index_of_biggest_in_corner) { + // apply the transposition while taking care to consider only + // the lower triangular part + Index s = size - index_of_biggest_in_corner - 1; // trailing size after the biggest element + mat.row(k).head(k).swap(mat.row(index_of_biggest_in_corner).head(k)); + mat.col(k).tail(s).swap(mat.col(index_of_biggest_in_corner).tail(s)); + std::swap(mat.coeffRef(k, k), mat.coeffRef(index_of_biggest_in_corner, index_of_biggest_in_corner)); + for (Index i = k + 1; i < index_of_biggest_in_corner; ++i) { + Scalar tmp = mat.coeffRef(i, k); + mat.coeffRef(i, k) = numext::conj(mat.coeffRef(index_of_biggest_in_corner, i)); + mat.coeffRef(index_of_biggest_in_corner, i) = numext::conj(tmp); + } + if (NumTraits::IsComplex) + mat.coeffRef(index_of_biggest_in_corner, k) = numext::conj(mat.coeff(index_of_biggest_in_corner, k)); + } + + // partition the matrix: + // A00 | - | - + // lu = A10 | A11 | - + // A20 | A21 | A22 + Index rs = size - k - 1; + Block A21(mat, k + 1, k, rs, 1); + Block A10(mat, k, 0, 1, k); + Block A20(mat, k + 1, 0, rs, k); + + if (k > 0) { + temp.head(k) = mat.diagonal().real().head(k).asDiagonal() * A10.adjoint(); + mat.coeffRef(k, k) -= (A10 * temp.head(k)).value(); + if (rs > 0) A21.noalias() -= A20 * temp.head(k); + } + + // In some previous versions of Eigen (e.g., 3.2.1), the scaling was omitted if the pivot + // was smaller than the cutoff value. However, since LDLT is not rank-revealing + // we should only make sure that we do not introduce INF or NaN values. + // Remark that LAPACK also uses 0 as the cutoff value. + RealScalar realAkk = numext::real(mat.coeffRef(k, k)); + bool pivot_is_valid = (abs(realAkk) > RealScalar(0)); + + if (k == 0 && !pivot_is_valid) { + // The entire diagonal is zero, there is nothing more to do + // except filling the transpositions, and checking whether the matrix is zero. + sign = ZeroSign; + for (Index j = 0; j < size; ++j) { + transpositions.coeffRef(j) = IndexType(j); + ret = ret && (mat.col(j).tail(size - j - 1).array() == Scalar(0)).all(); + } + return ret; + } + + if ((rs > 0) && pivot_is_valid) + A21 /= realAkk; + else if (rs > 0) + ret = ret && (A21.array() == Scalar(0)).all(); + + if (found_zero_pivot && pivot_is_valid) + ret = false; // factorization failed + else if (!pivot_is_valid) + found_zero_pivot = true; + + if (sign == PositiveSemiDef) { + if (realAkk < static_cast(0)) sign = Indefinite; + } else if (sign == NegativeSemiDef) { + if (realAkk > static_cast(0)) sign = Indefinite; + } else if (sign == ZeroSign) { + if (realAkk > static_cast(0)) + sign = PositiveSemiDef; + else if (realAkk < static_cast(0)) + sign = NegativeSemiDef; + } + } + + return ret; + } + + // Reference for the algorithm: Davis and Hager, "Multiple Rank + // Modifications of a Sparse Cholesky Factorization" (Algorithm 1) + // Trivial rearrangements of their computations (Timothy E. Holy) + // allow their algorithm to work for rank-1 updates even if the + // original matrix is not of full rank. + // Here only rank-1 updates are implemented, to reduce the + // requirement for intermediate storage and improve accuracy + template + static bool updateInPlace(MatrixType& mat, MatrixBase& w, + const typename MatrixType::RealScalar& sigma = 1) { + using numext::isfinite; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + + const Index size = mat.rows(); + eigen_assert(mat.cols() == size && w.size() == size); + + RealScalar alpha = 1; + + // Apply the update + for (Index j = 0; j < size; j++) { + // Check for termination due to an original decomposition of low-rank + if (!(isfinite)(alpha)) break; + + // Update the diagonal terms + RealScalar dj = numext::real(mat.coeff(j, j)); + Scalar wj = w.coeff(j); + RealScalar swj2 = sigma * numext::abs2(wj); + RealScalar gamma = dj * alpha + swj2; + + mat.coeffRef(j, j) += swj2 / alpha; + alpha += swj2 / dj; + + // Update the terms of L + Index rs = size - j - 1; + w.tail(rs) -= wj * mat.col(j).tail(rs); + if (!numext::is_exactly_zero(gamma)) mat.col(j).tail(rs) += (sigma * numext::conj(wj) / gamma) * w.tail(rs); + } + return true; + } + + template + static bool update(MatrixType& mat, const TranspositionType& transpositions, Workspace& tmp, const WType& w, + const typename MatrixType::RealScalar& sigma = 1) { + // Apply the permutation to the input w + tmp = transpositions * w; + + return ldlt_inplace::updateInPlace(mat, tmp, sigma); + } +}; + +template <> +struct ldlt_inplace { + template + static EIGEN_STRONG_INLINE bool unblocked(MatrixType& mat, TranspositionType& transpositions, Workspace& temp, + SignMatrix& sign) { + Transpose matt(mat); + return ldlt_inplace::unblocked(matt, transpositions, temp, sign); + } + + template + static EIGEN_STRONG_INLINE bool update(MatrixType& mat, TranspositionType& transpositions, Workspace& tmp, WType& w, + const typename MatrixType::RealScalar& sigma = 1) { + Transpose matt(mat); + return ldlt_inplace::update(matt, transpositions, tmp, w.conjugate(), sigma); + } +}; + +template +struct LDLT_Traits { + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } +}; + +template +struct LDLT_Traits { + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } +}; + +} // end namespace internal + +/** Compute / recompute the LDLT decomposition A = L D L^* = U^* D U of \a matrix + */ +template +template +LDLT& LDLT::compute(const EigenBase& a) { + eigen_assert(a.rows() == a.cols()); + const Index size = a.rows(); + + m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (UpLo_ == Lower) + abs_col_sum = + m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = + m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) m_l1_norm = abs_col_sum; + } + + m_transpositions.resize(size); + m_isInitialized = false; + m_temporary.resize(size); + m_sign = internal::ZeroSign; + + m_info = internal::ldlt_inplace::unblocked(m_matrix, m_transpositions, m_temporary, m_sign) ? Success + : NumericalIssue; + + m_isInitialized = true; + return *this; +} + +/** Update the LDLT decomposition: given A = L D L^T, efficiently compute the decomposition of A + sigma w w^T. + * \param w a vector to be incorporated into the decomposition. + * \param sigma a scalar, +1 for updates and -1 for "downdates," which correspond to removing previously-added column + * vectors. Optional; default value is +1. \sa setZero() + */ +template +template +LDLT& LDLT::rankUpdate( + const MatrixBase& w, const typename LDLT::RealScalar& sigma) { + typedef typename TranspositionType::StorageIndex IndexType; + const Index size = w.rows(); + if (m_isInitialized) { + eigen_assert(m_matrix.rows() == size); + } else { + m_matrix.resize(size, size); + m_matrix.setZero(); + m_transpositions.resize(size); + for (Index i = 0; i < size; i++) m_transpositions.coeffRef(i) = IndexType(i); + m_temporary.resize(size); + m_sign = sigma >= 0 ? internal::PositiveSemiDef : internal::NegativeSemiDef; + m_isInitialized = true; + } + + internal::ldlt_inplace::update(m_matrix, m_transpositions, m_temporary, w, sigma); + + return *this; +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LDLT::_solve_impl(const RhsType& rhs, DstType& dst) const { + _solve_impl_transposed(rhs, dst); +} + +template +template +void LDLT::_solve_impl_transposed(const RhsType& rhs, DstType& dst) const { + // dst = P b + dst = m_transpositions * rhs; + + // dst = L^-1 (P b) + // dst = L^-*T (P b) + matrixL().template conjugateIf().solveInPlace(dst); + + // dst = D^-* (L^-1 P b) + // dst = D^-1 (L^-*T P b) + // more precisely, use pseudo-inverse of D (see bug 241) + using std::abs; + const typename Diagonal::RealReturnType vecD(vectorD()); + // In some previous versions, tolerance was set to the max of 1/highest (or rather numeric_limits::min()) + // and the maximal diagonal entry * epsilon as motivated by LAPACK's xGELSS: + // RealScalar tolerance = numext::maxi(vecD.array().abs().maxCoeff() * NumTraits::epsilon(),RealScalar(1) + // / NumTraits::highest()); However, LDLT is not rank revealing, and so adjusting the tolerance wrt to the + // highest diagonal element is not well justified and leads to numerical issues in some cases. Moreover, Lapack's + // xSYTRS routines use 0 for the tolerance. Using numeric_limits::min() gives us more robustness to denormals. + RealScalar tolerance = (std::numeric_limits::min)(); + for (Index i = 0; i < vecD.size(); ++i) { + if (abs(vecD(i)) > tolerance) + dst.row(i) /= vecD(i); + else + dst.row(i).setZero(); + } + + // dst = L^-* (D^-* L^-1 P b) + // dst = L^-T (D^-1 L^-*T P b) + matrixL().transpose().template conjugateIf().solveInPlace(dst); + + // dst = P^T (L^-* D^-* L^-1 P b) = A^-1 b + // dst = P^-T (L^-T D^-1 L^-*T P b) = A^-1 b + dst = m_transpositions.transpose() * dst; +} +#endif + +/** \internal use x = ldlt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * \returns true always! If you need to check for existence of solutions, use another decomposition like LU, QR, or SVD. + * + * This version avoids a copy when the right hand side matrix b is not + * needed anymore. + * + * \sa LDLT::solve(), MatrixBase::ldlt() + */ +template +template +bool LDLT::solveInPlace(MatrixBase& bAndX) const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + eigen_assert(m_matrix.rows() == bAndX.rows()); + + bAndX = this->solve(bAndX); + + return true; +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: P^T L D L^* P. + * This function is provided for debug purpose. */ +template +MatrixType LDLT::reconstructedMatrix() const { + eigen_assert(m_isInitialized && "LDLT is not initialized."); + const Index size = m_matrix.rows(); + MatrixType res(size, size); + + // P + res.setIdentity(); + res = transpositionsP() * res; + // L^* P + res = matrixU() * res; + // D(L^*P) + res = vectorD().real().asDiagonal() * res; + // L(DL^*P) + res = matrixL() * res; + // P^T (LDL^*P) + res = transpositionsP().transpose() * res; + + return res; +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa MatrixBase::ldlt() + */ +template +inline const LDLT::PlainObject, UpLo> +SelfAdjointView::ldlt() const { + return LDLT(m_matrix); +} + +/** \cholesky_module + * \returns the Cholesky decomposition with full pivoting without square root of \c *this + * \sa SelfAdjointView::ldlt() + */ +template +inline const LDLT::PlainObject> MatrixBase::ldlt() const { + return LDLT(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_LDLT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Cholesky/LLT.h b/o-voxel/third_party/eigen/Eigen/src/Cholesky/LLT.h new file mode 100644 index 0000000000000000000000000000000000000000..c840b78634843a02c067a0e899c083ede3972f15 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Cholesky/LLT.h @@ -0,0 +1,514 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_LLT_H +#define EIGEN_LLT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct traits > : traits { + typedef MatrixXpr XprKind; + typedef SolverStorage StorageKind; + typedef int StorageIndex; + enum { Flags = 0 }; +}; + +template +struct LLT_Traits; +} // namespace internal + +/** \ingroup Cholesky_Module + * + * \class LLT + * + * \brief Standard Cholesky decomposition (LL^T) of a matrix and associated features + * + * \tparam MatrixType_ the type of the matrix of which we are computing the LL^T Cholesky decomposition + * \tparam UpLo_ the triangular part that will be used for the decomposition: Lower (default) or Upper. + * The other triangular part won't be read. + * + * This class performs a LL^T Cholesky decomposition of a symmetric, positive definite + * matrix A such that A = LL^* = U^*U, where L is lower triangular. + * + * While the Cholesky decomposition is particularly useful to solve selfadjoint problems like D^*D x = b, + * for that purpose, we recommend the Cholesky decomposition without square root which is more stable + * and even faster. Nevertheless, this standard Cholesky decomposition remains useful in many other + * situations like generalised eigen problems with hermitian matrices. + * + * Remember that Cholesky decompositions are not rank-revealing. This LLT decomposition is only stable on positive + * definite matrices, use LDLT instead for the semidefinite case. Also, do not use a Cholesky decomposition to determine + * whether a system of equations has a solution. + * + * Example: \include LLT_example.cpp + * Output: \verbinclude LLT_example.out + * + * \b Performance: for best performance, it is recommended to use a column-major storage format + * with the Lower triangular part (the default), or, equivalently, a row-major storage format + * with the Upper triangular part. Otherwise, you might get a 20% slowdown for the full factorization + * step, and rank-updates can be up to 3 times slower. + * + * This class supports the \link InplaceDecomposition inplace decomposition \endlink mechanism. + * + * Note that during the decomposition, only the lower (or upper, as defined by UpLo_) triangular part of A is + * considered. Therefore, the strict lower part does not have to store correct values. + * + * \sa MatrixBase::llt(), SelfAdjointView::llt(), class LDLT + */ +template +class LLT : public SolverBase > { + public: + typedef MatrixType_ MatrixType; + typedef SolverBase Base; + friend class SolverBase; + + EIGEN_GENERIC_PUBLIC_INTERFACE(LLT) + enum { MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; + + enum { PacketSize = internal::packet_traits::size, AlignmentMask = int(PacketSize) - 1, UpLo = UpLo_ }; + + typedef internal::LLT_Traits Traits; + + /** + * \brief Default Constructor. + * + * The default constructor is useful in cases in which the user intends to + * perform decompositions via LLT::compute(const MatrixType&). + */ + LLT() : m_matrix(), m_isInitialized(false) {} + + /** \brief Default Constructor with memory preallocation + * + * Like the default constructor but with preallocation of the internal data + * according to the specified problem \a size. + * \sa LLT() + */ + explicit LLT(Index size) : m_matrix(size, size), m_isInitialized(false) {} + + template + explicit LLT(const EigenBase& matrix) : m_matrix(matrix.rows(), matrix.cols()), m_isInitialized(false) { + compute(matrix.derived()); + } + + /** \brief Constructs a LLT factorization from a given matrix + * + * This overloaded constructor is provided for \link InplaceDecomposition inplace decomposition \endlink when + * \c MatrixType is a Eigen::Ref. + * + * \sa LLT(const EigenBase&) + */ + template + explicit LLT(EigenBase& matrix) : m_matrix(matrix.derived()), m_isInitialized(false) { + compute(matrix.derived()); + } + + /** \returns a view of the upper triangular matrix U */ + inline typename Traits::MatrixU matrixU() const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getU(m_matrix); + } + + /** \returns a view of the lower triangular matrix L */ + inline typename Traits::MatrixL matrixL() const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return Traits::getL(m_matrix); + } + +#ifdef EIGEN_PARSED_BY_DOXYGEN + /** \returns the solution x of \f$ A x = b \f$ using the current decomposition of A. + * + * Since this LLT class assumes anyway that the matrix A is invertible, the solution + * theoretically exists and is unique regardless of b. + * + * Example: \include LLT_solve.cpp + * Output: \verbinclude LLT_solve.out + * + * \sa solveInPlace(), MatrixBase::llt(), SelfAdjointView::llt() + */ + template + inline const Solve solve(const MatrixBase& b) const; +#endif + + template + void solveInPlace(const MatrixBase& bAndX) const; + + template + LLT& compute(const EigenBase& matrix); + + /** \returns an estimate of the reciprocal condition number of the matrix of + * which \c *this is the Cholesky decomposition. + */ + RealScalar rcond() const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_info == Success && "LLT failed because matrix appears to be negative"); + return internal::rcond_estimate_helper(m_l1_norm, *this); + } + + /** \returns the LLT decomposition matrix + * + * TODO: document the storage layout + */ + inline const MatrixType& matrixLLT() const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_matrix; + } + + MatrixType reconstructedMatrix() const; + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was successful, + * \c NumericalIssue if the matrix.appears not to be positive definite. + */ + ComputationInfo info() const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return m_info; + } + + /** \returns the adjoint of \c *this, that is, a const reference to the decomposition itself as the underlying matrix + * is self-adjoint. + * + * This method is provided for compatibility with other matrix decompositions, thus enabling generic code such as: + * \code x = decomposition.adjoint().solve(b) \endcode + */ + const LLT& adjoint() const noexcept { return *this; } + + constexpr Index rows() const noexcept { return m_matrix.rows(); } + constexpr Index cols() const noexcept { return m_matrix.cols(); } + + template + LLT& rankUpdate(const VectorType& vec, const RealScalar& sigma = 1); + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + void _solve_impl(const RhsType& rhs, DstType& dst) const; + + template + void _solve_impl_transposed(const RhsType& rhs, DstType& dst) const; +#endif + + protected: + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + + /** \internal + * Used to compute and store L + * The strict upper part is not used and even not initialized. + */ + MatrixType m_matrix; + RealScalar m_l1_norm; + bool m_isInitialized; + ComputationInfo m_info; +}; + +namespace internal { + +template +struct llt_inplace; + +template +static Index llt_rank_update_lower(MatrixType& mat, const VectorType& vec, + const typename MatrixType::RealScalar& sigma) { + using std::sqrt; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::ColXpr ColXpr; + typedef internal::remove_all_t ColXprCleaned; + typedef typename ColXprCleaned::SegmentReturnType ColXprSegment; + typedef Matrix TempVectorType; + typedef typename TempVectorType::SegmentReturnType TempVecSegment; + + Index n = mat.cols(); + eigen_assert(mat.rows() == n && vec.size() == n); + + TempVectorType temp; + + if (sigma > 0) { + // This version is based on Givens rotations. + // It is faster than the other one below, but only works for updates, + // i.e., for sigma > 0 + temp = sqrt(sigma) * vec; + + for (Index i = 0; i < n; ++i) { + JacobiRotation g; + g.makeGivens(mat(i, i), -temp(i), &mat(i, i)); + + Index rs = n - i - 1; + if (rs > 0) { + ColXprSegment x(mat.col(i).tail(rs)); + TempVecSegment y(temp.tail(rs)); + apply_rotation_in_the_plane(x, y, g); + } + } + } else { + temp = vec; + RealScalar beta = 1; + for (Index j = 0; j < n; ++j) { + RealScalar Ljj = numext::real(mat.coeff(j, j)); + RealScalar dj = numext::abs2(Ljj); + Scalar wj = temp.coeff(j); + RealScalar swj2 = sigma * numext::abs2(wj); + RealScalar gamma = dj * beta + swj2; + + RealScalar x = dj + swj2 / beta; + if (x <= RealScalar(0)) return j; + RealScalar nLjj = sqrt(x); + mat.coeffRef(j, j) = nLjj; + beta += swj2 / dj; + + // Update the terms of L + Index rs = n - j - 1; + if (rs) { + temp.tail(rs) -= (wj / Ljj) * mat.col(j).tail(rs); + if (!numext::is_exactly_zero(gamma)) + mat.col(j).tail(rs) = + (nLjj / Ljj) * mat.col(j).tail(rs) + (nLjj * sigma * numext::conj(wj) / gamma) * temp.tail(rs); + } + } + } + return -1; +} + +template +struct llt_inplace { + typedef typename NumTraits::Real RealScalar; + template + static Index unblocked(MatrixType& mat) { + using std::sqrt; + + eigen_assert(mat.rows() == mat.cols()); + const Index size = mat.rows(); + for (Index k = 0; k < size; ++k) { + Index rs = size - k - 1; // remaining size + + Block A21(mat, k + 1, k, rs, 1); + Block A10(mat, k, 0, 1, k); + Block A20(mat, k + 1, 0, rs, k); + + RealScalar x = numext::real(mat.coeff(k, k)); + if (k > 0) x -= A10.squaredNorm(); + if (x <= RealScalar(0)) return k; + mat.coeffRef(k, k) = x = sqrt(x); + if (k > 0 && rs > 0) A21.noalias() -= A20 * A10.adjoint(); + if (rs > 0) A21 /= x; + } + return -1; + } + + template + static Index blocked(MatrixType& m) { + eigen_assert(m.rows() == m.cols()); + Index size = m.rows(); + if (size < 32) return unblocked(m); + + Index blockSize = size / 8; + blockSize = (blockSize / 16) * 16; + blockSize = (std::min)((std::max)(blockSize, Index(8)), Index(128)); + + for (Index k = 0; k < size; k += blockSize) { + // partition the matrix: + // A00 | - | - + // lu = A10 | A11 | - + // A20 | A21 | A22 + Index bs = (std::min)(blockSize, size - k); + Index rs = size - k - bs; + Block A11(m, k, k, bs, bs); + Block A21(m, k + bs, k, rs, bs); + Block A22(m, k + bs, k + bs, rs, rs); + + Index ret; + if ((ret = unblocked(A11)) >= 0) return k + ret; + if (rs > 0) A11.adjoint().template triangularView().template solveInPlace(A21); + if (rs > 0) + A22.template selfadjointView().rankUpdate(A21, + typename NumTraits::Literal(-1)); // bottleneck + } + return -1; + } + + template + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { + return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); + } +}; + +template +struct llt_inplace { + typedef typename NumTraits::Real RealScalar; + + template + static EIGEN_STRONG_INLINE Index unblocked(MatrixType& mat) { + Transpose matt(mat); + return llt_inplace::unblocked(matt); + } + template + static EIGEN_STRONG_INLINE Index blocked(MatrixType& mat) { + Transpose matt(mat); + return llt_inplace::blocked(matt); + } + template + static Index rankUpdate(MatrixType& mat, const VectorType& vec, const RealScalar& sigma) { + Transpose matt(mat); + return llt_inplace::rankUpdate(matt, vec.conjugate(), sigma); + } +}; + +template +struct LLT_Traits { + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m.adjoint()); } + static bool inplace_decomposition(MatrixType& m) { + return llt_inplace::blocked(m) == -1; + } +}; + +template +struct LLT_Traits { + typedef const TriangularView MatrixL; + typedef const TriangularView MatrixU; + static inline MatrixL getL(const MatrixType& m) { return MatrixL(m.adjoint()); } + static inline MatrixU getU(const MatrixType& m) { return MatrixU(m); } + static bool inplace_decomposition(MatrixType& m) { + return llt_inplace::blocked(m) == -1; + } +}; + +} // end namespace internal + +/** Computes / recomputes the Cholesky decomposition A = LL^* = U^*U of \a matrix + * + * \returns a reference to *this + * + * Example: \include TutorialLinAlgComputeTwice.cpp + * Output: \verbinclude TutorialLinAlgComputeTwice.out + */ +template +template +LLT& LLT::compute(const EigenBase& a) { + eigen_assert(a.rows() == a.cols()); + const Index size = a.rows(); + m_matrix.resize(size, size); + if (!internal::is_same_dense(m_matrix, a.derived())) m_matrix = a.derived(); + + // Compute matrix L1 norm = max abs column sum. + m_l1_norm = RealScalar(0); + // TODO move this code to SelfAdjointView + for (Index col = 0; col < size; ++col) { + RealScalar abs_col_sum; + if (UpLo_ == Lower) + abs_col_sum = + m_matrix.col(col).tail(size - col).template lpNorm<1>() + m_matrix.row(col).head(col).template lpNorm<1>(); + else + abs_col_sum = + m_matrix.col(col).head(col).template lpNorm<1>() + m_matrix.row(col).tail(size - col).template lpNorm<1>(); + if (abs_col_sum > m_l1_norm) m_l1_norm = abs_col_sum; + } + + m_isInitialized = true; + bool ok = Traits::inplace_decomposition(m_matrix); + m_info = ok ? Success : NumericalIssue; + + return *this; +} + +/** Performs a rank one update (or dowdate) of the current decomposition. + * If A = LL^* before the rank one update, + * then after it we have LL^* = A + sigma * v v^* where \a v must be a vector + * of same dimension. + */ +template +template +LLT& LLT::rankUpdate(const VectorType& v, const RealScalar& sigma) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorType); + eigen_assert(v.size() == m_matrix.cols()); + eigen_assert(m_isInitialized); + if (internal::llt_inplace::rankUpdate(m_matrix, v, sigma) >= 0) + m_info = NumericalIssue; + else + m_info = Success; + + return *this; +} + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +void LLT::_solve_impl(const RhsType& rhs, DstType& dst) const { + _solve_impl_transposed(rhs, dst); +} + +template +template +void LLT::_solve_impl_transposed(const RhsType& rhs, DstType& dst) const { + dst = rhs; + + matrixL().template conjugateIf().solveInPlace(dst); + matrixU().template conjugateIf().solveInPlace(dst); +} +#endif + +/** \internal use x = llt_object.solve(x); + * + * This is the \em in-place version of solve(). + * + * \param bAndX represents both the right-hand side matrix b and result x. + * + * This version avoids a copy when the right hand side matrix b is not needed anymore. + * + * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. + * This function will const_cast it, so constness isn't honored here. + * + * \sa LLT::solve(), MatrixBase::llt() + */ +template +template +void LLT::solveInPlace(const MatrixBase& bAndX) const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + eigen_assert(m_matrix.rows() == bAndX.rows()); + matrixL().solveInPlace(bAndX); + matrixU().solveInPlace(bAndX); +} + +/** \returns the matrix represented by the decomposition, + * i.e., it returns the product: L L^*. + * This function is provided for debug purpose. */ +template +MatrixType LLT::reconstructedMatrix() const { + eigen_assert(m_isInitialized && "LLT is not initialized."); + return matrixL() * matrixL().adjoint().toDenseMatrix(); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() + */ +template +inline const LLT::PlainObject> MatrixBase::llt() const { + return LLT(derived()); +} + +/** \cholesky_module + * \returns the LLT decomposition of \c *this + * \sa SelfAdjointView::llt() + */ +template +inline const LLT::PlainObject, UpLo> SelfAdjointView::llt() + const { + return LLT(m_matrix); +} + +} // end namespace Eigen + +#endif // EIGEN_LLT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h b/o-voxel/third_party/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h new file mode 100644 index 0000000000000000000000000000000000000000..5f68f8f13c062a875337ddd057a4dbad6e54f25f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Cholesky/LLT_LAPACKE.h @@ -0,0 +1,124 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to LAPACKe + * LLt decomposition based on LAPACKE_?potrf function. + ******************************************************************************** +*/ + +#ifndef EIGEN_LLT_LAPACKE_H +#define EIGEN_LLT_LAPACKE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +namespace lapacke_helpers { +// ------------------------------------------------------------------------------------------------------------------- +// Dispatch for rank update handling upper and lower parts +// ------------------------------------------------------------------------------------------------------------------- + +template +struct rank_update {}; + +template <> +struct rank_update { + template + static Index run(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) { + return Eigen::internal::llt_rank_update_lower(mat, vec, sigma); + } +}; + +template <> +struct rank_update { + template + static Index run(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) { + Transpose matt(mat); + return Eigen::internal::llt_rank_update_lower(matt, vec.conjugate(), sigma); + } +}; + +// ------------------------------------------------------------------------------------------------------------------- +// Generic lapacke llt implementation that hands of to the dispatches +// ------------------------------------------------------------------------------------------------------------------- + +template +struct lapacke_llt { + EIGEN_STATIC_ASSERT(((Mode == Lower) || (Mode == Upper)), MODE_MUST_BE_UPPER_OR_LOWER) + template + static Index blocked(MatrixType &m) { + eigen_assert(m.rows() == m.cols()); + if (m.rows() == 0) { + return -1; + } + /* Set up parameters for ?potrf */ + lapack_int size = to_lapack(m.rows()); + lapack_int matrix_order = lapack_storage_of(m); + constexpr char uplo = Mode == Upper ? 'U' : 'L'; + Scalar *a = &(m.coeffRef(0, 0)); + lapack_int lda = to_lapack(m.outerStride()); + + lapack_int info = potrf(matrix_order, uplo, size, to_lapack(a), lda); + info = (info == 0) ? -1 : info > 0 ? info - 1 : size; + return info; + } + + template + static Index rankUpdate(MatrixType &mat, const VectorType &vec, const typename MatrixType::RealScalar &sigma) { + return rank_update::run(mat, vec, sigma); + } +}; +} // namespace lapacke_helpers +// end namespace lapacke_helpers + +/* + * Here, we just put the generic implementation from lapacke_llt into a full specialization of the llt_inplace + * type. By being a full specialization, the versions defined here thus get precedence over the generic implementation + * in LLT.h for double, float and complex double, complex float types. + */ + +#define EIGEN_LAPACKE_LLT(EIGTYPE) \ + template <> \ + struct llt_inplace : public lapacke_helpers::lapacke_llt {}; \ + template <> \ + struct llt_inplace : public lapacke_helpers::lapacke_llt {}; + +EIGEN_LAPACKE_LLT(double) +EIGEN_LAPACKE_LLT(float) +EIGEN_LAPACKE_LLT(std::complex) +EIGEN_LAPACKE_LLT(std::complex) + +#undef EIGEN_LAPACKE_LLT + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_LLT_LAPACKE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/CholmodSupport/CholmodSupport.h b/o-voxel/third_party/eigen/Eigen/src/CholmodSupport/CholmodSupport.h new file mode 100644 index 0000000000000000000000000000000000000000..d41831edbd89885f75ad43c3b1311c7eda0bc72b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/CholmodSupport/CholmodSupport.h @@ -0,0 +1,738 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CHOLMODSUPPORT_H +#define EIGEN_CHOLMODSUPPORT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct cholmod_configure_matrix; + +template <> +struct cholmod_configure_matrix { + template + static void run(CholmodType& mat) { + mat.xtype = CHOLMOD_REAL; + mat.dtype = CHOLMOD_DOUBLE; + } +}; + +template <> +struct cholmod_configure_matrix > { + template + static void run(CholmodType& mat) { + mat.xtype = CHOLMOD_COMPLEX; + mat.dtype = CHOLMOD_DOUBLE; + } +}; + +// Other scalar types are not yet supported by Cholmod +// template<> struct cholmod_configure_matrix { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_REAL; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; +// +// template<> struct cholmod_configure_matrix > { +// template +// static void run(CholmodType& mat) { +// mat.xtype = CHOLMOD_COMPLEX; +// mat.dtype = CHOLMOD_SINGLE; +// } +// }; + +} // namespace internal + +/** Wraps the Eigen sparse matrix \a mat into a Cholmod sparse matrix object. + * Note that the data are shared. + */ +template +cholmod_sparse viewAsCholmod(Ref > mat) { + cholmod_sparse res; + res.nzmax = mat.nonZeros(); + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.p = mat.outerIndexPtr(); + res.i = mat.innerIndexPtr(); + res.x = mat.valuePtr(); + res.z = 0; + res.sorted = 1; + if (mat.isCompressed()) { + res.packed = 1; + res.nz = 0; + } else { + res.packed = 0; + res.nz = mat.innerNonZeroPtr(); + } + + res.dtype = 0; + res.stype = -1; + + if (internal::is_same::value) { + res.itype = CHOLMOD_INT; + } else if (internal::is_same::value) { + res.itype = CHOLMOD_LONG; + } else { + eigen_assert(false && "Index type not supported yet"); + } + + // setup res.xtype + internal::cholmod_configure_matrix::run(res); + + res.stype = 0; + + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseMatrix& mat) { + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +template +const cholmod_sparse viewAsCholmod(const SparseVector& mat) { + cholmod_sparse res = viewAsCholmod(Ref >(mat.const_cast_derived())); + return res; +} + +/** Returns a view of the Eigen sparse matrix \a mat as Cholmod sparse matrix. + * The data are not copied but shared. */ +template +cholmod_sparse viewAsCholmod(const SparseSelfAdjointView, UpLo>& mat) { + cholmod_sparse res = viewAsCholmod(Ref >(mat.matrix().const_cast_derived())); + + if (UpLo == Upper) res.stype = 1; + if (UpLo == Lower) res.stype = -1; + // swap stype for rowmajor matrices (only works for real matrices) + EIGEN_STATIC_ASSERT((Options_ & RowMajorBit) == 0 || NumTraits::IsComplex == 0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + if (Options_ & RowMajorBit) res.stype *= -1; + + return res; +} + +/** Returns a view of the Eigen \b dense matrix \a mat as Cholmod dense matrix. + * The data are not copied but shared. */ +template +cholmod_dense viewAsCholmod(MatrixBase& mat) { + EIGEN_STATIC_ASSERT((internal::traits::Flags & RowMajorBit) == 0, + THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + typedef typename Derived::Scalar Scalar; + + cholmod_dense res; + res.nrow = mat.rows(); + res.ncol = mat.cols(); + res.nzmax = res.nrow * res.ncol; + res.d = Derived::IsVectorAtCompileTime ? mat.derived().size() : mat.derived().outerStride(); + res.x = (void*)(mat.derived().data()); + res.z = 0; + + internal::cholmod_configure_matrix::run(res); + + return res; +} + +/** Returns a view of the Cholmod sparse matrix \a cm as an Eigen sparse matrix. + * The data are not copied but shared. */ +template +Map > viewAsEigen(cholmod_sparse& cm) { + return Map >( + cm.nrow, cm.ncol, static_cast(cm.p)[cm.ncol], static_cast(cm.p), + static_cast(cm.i), static_cast(cm.x)); +} + +/** Returns a view of the Cholmod sparse matrix factor \a cm as an Eigen sparse matrix. + * The data are not copied but shared. */ +template +Map > viewAsEigen(cholmod_factor& cm) { + return Map >( + cm.n, cm.n, static_cast(cm.p)[cm.n], static_cast(cm.p), + static_cast(cm.i), static_cast(cm.x)); +} + +namespace internal { + +// template specializations for int and long that call the correct cholmod method + +#define EIGEN_CHOLMOD_SPECIALIZE0(ret, name) \ + template \ + inline ret cm_##name(cholmod_common& Common) { \ + return cholmod_##name(&Common); \ + } \ + template <> \ + inline ret cm_##name(cholmod_common & Common) { \ + return cholmod_l_##name(&Common); \ + } + +#define EIGEN_CHOLMOD_SPECIALIZE1(ret, name, t1, a1) \ + template \ + inline ret cm_##name(t1& a1, cholmod_common& Common) { \ + return cholmod_##name(&a1, &Common); \ + } \ + template <> \ + inline ret cm_##name(t1 & a1, cholmod_common & Common) { \ + return cholmod_l_##name(&a1, &Common); \ + } + +EIGEN_CHOLMOD_SPECIALIZE0(int, start) +EIGEN_CHOLMOD_SPECIALIZE0(int, finish) + +EIGEN_CHOLMOD_SPECIALIZE1(int, free_factor, cholmod_factor*, L) +EIGEN_CHOLMOD_SPECIALIZE1(int, free_dense, cholmod_dense*, X) +EIGEN_CHOLMOD_SPECIALIZE1(int, free_sparse, cholmod_sparse*, A) + +EIGEN_CHOLMOD_SPECIALIZE1(cholmod_factor*, analyze, cholmod_sparse, A) +EIGEN_CHOLMOD_SPECIALIZE1(cholmod_sparse*, factor_to_sparse, cholmod_factor, L) + +template +inline cholmod_dense* cm_solve(int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common& Common) { + return cholmod_solve(sys, &L, &B, &Common); +} +template <> +inline cholmod_dense* cm_solve(int sys, cholmod_factor& L, cholmod_dense& B, cholmod_common& Common) { + return cholmod_l_solve(sys, &L, &B, &Common); +} + +template +inline cholmod_sparse* cm_spsolve(int sys, cholmod_factor& L, cholmod_sparse& B, cholmod_common& Common) { + return cholmod_spsolve(sys, &L, &B, &Common); +} +template <> +inline cholmod_sparse* cm_spsolve(int sys, cholmod_factor& L, cholmod_sparse& B, + cholmod_common& Common) { + return cholmod_l_spsolve(sys, &L, &B, &Common); +} + +template +inline int cm_factorize_p(cholmod_sparse* A, double beta[2], StorageIndex_* fset, std::size_t fsize, cholmod_factor* L, + cholmod_common& Common) { + return cholmod_factorize_p(A, beta, fset, fsize, L, &Common); +} +template <> +inline int cm_factorize_p(cholmod_sparse* A, double beta[2], SuiteSparse_long* fset, + std::size_t fsize, cholmod_factor* L, cholmod_common& Common) { + return cholmod_l_factorize_p(A, beta, fset, fsize, L, &Common); +} + +#undef EIGEN_CHOLMOD_SPECIALIZE0 +#undef EIGEN_CHOLMOD_SPECIALIZE1 + +} // namespace internal + +enum CholmodMode { CholmodAuto, CholmodSimplicialLLt, CholmodSupernodalLLt, CholmodLDLt }; + +/** \ingroup CholmodSupport_Module + * \class CholmodBase + * \brief The base class for the direct Cholesky factorization of Cholmod + * \sa class CholmodSupernodalLLT, class CholmodSimplicialLDLT, class CholmodSimplicialLLT + */ +template +class CholmodBase : public SparseSolverBase { + protected: + typedef SparseSolverBase Base; + using Base::derived; + using Base::m_isInitialized; + + public: + typedef MatrixType_ MatrixType; + enum { UpLo = UpLo_ }; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef MatrixType CholMatrixType; + typedef typename MatrixType::StorageIndex StorageIndex; + enum { ColsAtCompileTime = MatrixType::ColsAtCompileTime, MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime }; + + public: + CholmodBase() : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) { + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; + internal::cm_start(m_cholmod); + } + + explicit CholmodBase(const MatrixType& matrix) + : m_cholmodFactor(0), m_info(Success), m_factorizationIsOk(false), m_analysisIsOk(false) { + EIGEN_STATIC_ASSERT((internal::is_same::value), CHOLMOD_SUPPORTS_DOUBLE_PRECISION_ONLY); + m_shiftOffset[0] = m_shiftOffset[1] = 0.0; + internal::cm_start(m_cholmod); + compute(matrix); + } + + ~CholmodBase() { + if (m_cholmodFactor) internal::cm_free_factor(m_cholmodFactor, m_cholmod); + internal::cm_finish(m_cholmod); + } + + inline StorageIndex cols() const { return internal::convert_index(m_cholmodFactor->n); } + inline StorageIndex rows() const { return internal::convert_index(m_cholmodFactor->n); } + + /** \brief Reports whether previous computation was successful. + * + * \returns \c Success if computation was successful, + * \c NumericalIssue if the matrix.appears to be negative. + */ + ComputationInfo info() const { + eigen_assert(m_isInitialized && "Decomposition is not initialized."); + return m_info; + } + + /** Computes the sparse Cholesky decomposition of \a matrix */ + Derived& compute(const MatrixType& matrix) { + analyzePattern(matrix); + factorize(matrix); + return derived(); + } + + /** Performs a symbolic decomposition on the sparsity pattern of \a matrix. + * + * This function is particularly useful when solving for several problems having the same structure. + * + * \sa factorize() + */ + void analyzePattern(const MatrixType& matrix) { + if (m_cholmodFactor) { + internal::cm_free_factor(m_cholmodFactor, m_cholmod); + m_cholmodFactor = 0; + } + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + m_cholmodFactor = internal::cm_analyze(A, m_cholmod); + + this->m_isInitialized = true; + this->m_info = Success; + m_analysisIsOk = true; + m_factorizationIsOk = false; + } + + /** Performs a numeric decomposition of \a matrix + * + * The given matrix must have the same sparsity pattern as the matrix on which the symbolic decomposition has been + * performed. + * + * \sa analyzePattern() + */ + void factorize(const MatrixType& matrix) { + eigen_assert(m_analysisIsOk && "You must first call analyzePattern()"); + cholmod_sparse A = viewAsCholmod(matrix.template selfadjointView()); + internal::cm_factorize_p(&A, m_shiftOffset, 0, 0, m_cholmodFactor, m_cholmod); + + // If the factorization failed, either the input matrix was zero (so m_cholmodFactor == nullptr), or minor is the + // column at which it failed. On success minor == n. + this->m_info = + (m_cholmodFactor != nullptr && m_cholmodFactor->minor == m_cholmodFactor->n ? Success : NumericalIssue); + m_factorizationIsOk = true; + } + + /** Returns a reference to the Cholmod's configuration structure to get a full control over the performed operations. + * See the Cholmod user guide for details. */ + cholmod_common& cholmod() { return m_cholmod; } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal */ + template + void _solve_impl(const MatrixBase& b, MatrixBase& dest) const { + eigen_assert(m_factorizationIsOk && + "The decomposition is not in a valid state for solving, you must first call either compute() or " + "symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size == b.rows()); + + // Cholmod needs column-major storage without inner-stride, which corresponds to the default behavior of Ref. + Ref > b_ref(b.derived()); + + cholmod_dense b_cd = viewAsCholmod(b_ref); + cholmod_dense* x_cd = internal::cm_solve(CHOLMOD_A, *m_cholmodFactor, b_cd, m_cholmod); + if (!x_cd) { + this->m_info = NumericalIssue; + return; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + // NOTE Actually, the copy can be avoided by calling cholmod_solve2 instead of cholmod_solve + dest = Matrix::Map(reinterpret_cast(x_cd->x), + b.rows(), b.cols()); + internal::cm_free_dense(x_cd, m_cholmod); + } + + /** \internal */ + template + void _solve_impl(const SparseMatrixBase& b, SparseMatrixBase& dest) const { + eigen_assert(m_factorizationIsOk && + "The decomposition is not in a valid state for solving, you must first call either compute() or " + "symbolic()/numeric()"); + const Index size = m_cholmodFactor->n; + EIGEN_UNUSED_VARIABLE(size); + eigen_assert(size == b.rows()); + + // note: cs stands for Cholmod Sparse + Ref > b_ref( + b.const_cast_derived()); + cholmod_sparse b_cs = viewAsCholmod(b_ref); + cholmod_sparse* x_cs = internal::cm_spsolve(CHOLMOD_A, *m_cholmodFactor, b_cs, m_cholmod); + if (!x_cs) { + this->m_info = NumericalIssue; + return; + } + // TODO optimize this copy by swapping when possible (be careful with alignment, etc.) + // NOTE cholmod_spsolve in fact just calls the dense solver for blocks of 4 columns at a time (similar to Eigen's + // sparse solver) + dest.derived() = viewAsEigen(*x_cs); + internal::cm_free_sparse(x_cs, m_cholmod); + } +#endif // EIGEN_PARSED_BY_DOXYGEN + + /** Sets the shift parameter that will be used to adjust the diagonal coefficients during the numerical factorization. + * + * During the numerical factorization, an offset term is added to the diagonal coefficients:\n + * \c d_ii = \a offset + \c d_ii + * + * The default is \a offset=0. + * + * \returns a reference to \c *this. + */ + Derived& setShift(const RealScalar& offset) { + m_shiftOffset[0] = double(offset); + return derived(); + } + + /** \returns the determinant of the underlying matrix from the current factorization */ + Scalar determinant() const { + using std::exp; + return exp(logDeterminant()); + } + + /** \returns the log determinant of the underlying matrix from the current factorization */ + Scalar logDeterminant() const { + using numext::real; + using std::log; + eigen_assert(m_factorizationIsOk && + "The decomposition is not in a valid state for solving, you must first call either compute() or " + "symbolic()/numeric()"); + + RealScalar logDet = 0; + Scalar* x = static_cast(m_cholmodFactor->x); + if (m_cholmodFactor->is_super) { + // Supernodal factorization stored as a packed list of dense column-major blocks, + // as described by the following structure: + + // super[k] == index of the first column of the j-th super node + StorageIndex* super = static_cast(m_cholmodFactor->super); + // pi[k] == offset to the description of row indices + StorageIndex* pi = static_cast(m_cholmodFactor->pi); + // px[k] == offset to the respective dense block + StorageIndex* px = static_cast(m_cholmodFactor->px); + + Index nb_super_nodes = m_cholmodFactor->nsuper; + for (Index k = 0; k < nb_super_nodes; ++k) { + StorageIndex ncols = super[k + 1] - super[k]; + StorageIndex nrows = pi[k + 1] - pi[k]; + + Map, 0, InnerStride<> > sk(x + px[k], ncols, InnerStride<>(nrows + 1)); + logDet += sk.real().log().sum(); + } + } else { + // Simplicial factorization stored as standard CSC matrix. + StorageIndex* p = static_cast(m_cholmodFactor->p); + Index size = m_cholmodFactor->n; + for (Index k = 0; k < size; ++k) logDet += log(real(x[p[k]])); + } + if (m_cholmodFactor->is_ll) logDet *= 2.0; + return logDet; + } + + template + void dumpMemory(Stream& /*s*/) {} + + protected: + mutable cholmod_common m_cholmod; + cholmod_factor* m_cholmodFactor; + double m_shiftOffset[2]; + mutable ComputationInfo m_info; + int m_factorizationIsOk; + int m_analysisIsOk; +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLLT + * \brief A simplicial direct Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLLT class. Therefore, it has little practical + * interest. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices X and B can be + * either dense or sparse. + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non + * compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLLT + */ +template +class CholmodSimplicialLLT : public CholmodBase > { + typedef CholmodBase Base; + using Base::m_cholmod; + + public: + typedef MatrixType_ MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef TriangularView MatrixL; + typedef TriangularView MatrixU; + + CholmodSimplicialLLT() : Base() { init(); } + + CholmodSimplicialLLT(const MatrixType& matrix) : Base() { + init(); + this->compute(matrix); + } + + ~CholmodSimplicialLLT() {} + + /** \returns an expression of the factor L */ + inline MatrixL matrixL() const { return viewAsEigen(*Base::m_cholmodFactor); } + + /** \returns an expression of the factor U (= L^*) */ + inline MatrixU matrixU() const { return matrixL().adjoint(); } + + protected: + void init() { + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSimplicialLDLT + * \brief A simplicial direct Cholesky (LDLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a simplicial LDL^T Cholesky factorization + * using the Cholmod library. + * This simplicial variant is equivalent to Eigen's built-in SimplicialLDLT class. Therefore, it has little practical + * interest. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices X and B can be + * either dense or sparse. + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non + * compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept, class CholmodSupernodalLLT, class SimplicialLDLT + */ +template +class CholmodSimplicialLDLT : public CholmodBase > { + typedef CholmodBase Base; + using Base::m_cholmod; + + public: + typedef MatrixType_ MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef Matrix VectorType; + typedef TriangularView MatrixL; + typedef TriangularView MatrixU; + + CholmodSimplicialLDLT() : Base() { init(); } + + CholmodSimplicialLDLT(const MatrixType& matrix) : Base() { + init(); + this->compute(matrix); + } + + ~CholmodSimplicialLDLT() {} + + /** \returns a vector expression of the diagonal D */ + inline VectorType vectorD() const { + auto cholmodL = viewAsEigen(*Base::m_cholmodFactor); + + VectorType D{cholmodL.rows()}; + + for (Index k = 0; k < cholmodL.outerSize(); ++k) { + typename decltype(cholmodL)::InnerIterator it{cholmodL, k}; + D(k) = it.value(); + } + + return D; + } + + /** \returns an expression of the factor L */ + inline MatrixL matrixL() const { return viewAsEigen(*Base::m_cholmodFactor); } + + /** \returns an expression of the factor U (= L^*) */ + inline MatrixU matrixU() const { return matrixL().adjoint(); } + + protected: + void init() { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodSupernodalLLT + * \brief A supernodal Cholesky (LLT) factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a supernodal LL^T Cholesky factorization + * using the Cholmod library. + * This supernodal variant performs best on dense enough problems, e.g., 3D FEM, or very high order 2D FEM. + * The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non + * compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept + */ +template +class CholmodSupernodalLLT : public CholmodBase > { + typedef CholmodBase Base; + using Base::m_cholmod; + + public: + typedef MatrixType_ MatrixType; + typedef typename MatrixType::Scalar Scalar; + typedef typename MatrixType::RealScalar RealScalar; + typedef typename MatrixType::StorageIndex StorageIndex; + + CholmodSupernodalLLT() : Base() { init(); } + + CholmodSupernodalLLT(const MatrixType& matrix) : Base() { + init(); + this->compute(matrix); + } + + ~CholmodSupernodalLLT() {} + + /** \returns an expression of the factor L */ + inline MatrixType matrixL() const { + // Convert Cholmod factor's supernodal storage format to Eigen's CSC storage format + cholmod_sparse* cholmodL = internal::cm_factor_to_sparse(*Base::m_cholmodFactor, m_cholmod); + MatrixType L = viewAsEigen(*cholmodL); + internal::cm_free_sparse(cholmodL, m_cholmod); + + return L; + } + + /** \returns an expression of the factor U (= L^*) */ + inline MatrixType matrixU() const { return matrixL().adjoint(); } + + protected: + void init() { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + } +}; + +/** \ingroup CholmodSupport_Module + * \class CholmodDecomposition + * \brief A general Cholesky factorization and solver based on Cholmod + * + * This class allows to solve for A.X = B sparse linear problems via a LL^T or LDL^T Cholesky factorization + * using the Cholmod library. The sparse matrix A must be selfadjoint and positive definite. The vectors or matrices + * X and B can be either dense or sparse. + * + * This variant permits to change the underlying Cholesky method at runtime. + * On the other hand, it does not provide access to the result of the factorization. + * The default is to let Cholmod automatically choose between a simplicial and supernodal factorization. + * + * \tparam MatrixType_ the type of the sparse matrix A, it must be a SparseMatrix<> + * \tparam UpLo_ the triangular part that will be used for the computations. It can be Lower + * or Upper. Default is Lower. + * + * \implsparsesolverconcept + * + * This class supports all kind of SparseMatrix<>: row or column major; upper, lower, or both; compressed or non + * compressed. + * + * \warning Only double precision real and complex scalar types are supported by Cholmod. + * + * \sa \ref TutorialSparseSolverConcept + */ +template +class CholmodDecomposition : public CholmodBase > { + typedef CholmodBase Base; + using Base::m_cholmod; + + public: + typedef MatrixType_ MatrixType; + + CholmodDecomposition() : Base() { init(); } + + CholmodDecomposition(const MatrixType& matrix) : Base() { + init(); + this->compute(matrix); + } + + ~CholmodDecomposition() {} + + void setMode(CholmodMode mode) { + switch (mode) { + case CholmodAuto: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + break; + case CholmodSimplicialLLt: + m_cholmod.final_asis = 0; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + m_cholmod.final_ll = 1; + break; + case CholmodSupernodalLLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SUPERNODAL; + break; + case CholmodLDLt: + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_SIMPLICIAL; + break; + default: + break; + } + } + + protected: + void init() { + m_cholmod.final_asis = 1; + m_cholmod.supernodal = CHOLMOD_AUTO; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_CHOLMODSUPPORT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h b/o-voxel/third_party/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h new file mode 100644 index 0000000000000000000000000000000000000000..7e40be515f1746876705197a220ddfd094d8beff --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/CholmodSupport/InternalHeaderCheck.h @@ -0,0 +1,3 @@ +#ifndef EIGEN_CHOLMODSUPPORT_MODULE_H +#error "Please include Eigen/CholmodSupport instead of including headers inside the src directory directly." +#endif diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ArithmeticSequence.h b/o-voxel/third_party/eigen/Eigen/src/Core/ArithmeticSequence.h new file mode 100644 index 0000000000000000000000000000000000000000..0674f6c085c7ed6c9749a94034b8c6be100343c3 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ArithmeticSequence.h @@ -0,0 +1,239 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARITHMETIC_SEQUENCE_H +#define EIGEN_ARITHMETIC_SEQUENCE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// Helper to cleanup the type of the increment: +template +struct cleanup_seq_incr { + typedef typename cleanup_index_type::type type; +}; + +} // namespace internal + +//-------------------------------------------------------------------------------- +// seq(first,last,incr) and seqN(first,size,incr) +//-------------------------------------------------------------------------------- + +template > +class ArithmeticSequence; + +template +ArithmeticSequence::type, + typename internal::cleanup_index_type::type, + typename internal::cleanup_seq_incr::type> +seqN(FirstType first, SizeType size, IncrType incr); + +/** \class ArithmeticSequence + * \ingroup Core_Module + * + * This class represents an arithmetic progression \f$ a_0, a_1, a_2, ..., a_{n-1}\f$ defined by + * its \em first value \f$ a_0 \f$, its \em size (aka length) \em n, and the \em increment (aka stride) + * that is equal to \f$ a_{i+1}-a_{i}\f$ for any \em i. + * + * It is internally used as the return type of the Eigen::seq and Eigen::seqN functions, and as the input arguments + * of DenseBase::operator()(const RowIndices&, const ColIndices&), and most of the time this is the + * only way it is used. + * + * \tparam FirstType type of the first element, usually an Index, + * but internally it can be a symbolic expression + * \tparam SizeType type representing the size of the sequence, usually an Index + * or a compile time integral constant. Internally, it can also be a symbolic expression + * \tparam IncrType type of the increment, can be a runtime Index, or a compile time integral constant (default is + * compile-time 1) + * + * \sa Eigen::seq, Eigen::seqN, DenseBase::operator()(const RowIndices&, const ColIndices&), class IndexedView + */ +template +class ArithmeticSequence { + public: + constexpr ArithmeticSequence() = default; + constexpr ArithmeticSequence(FirstType first, SizeType size) : m_first(first), m_size(size) {} + constexpr ArithmeticSequence(FirstType first, SizeType size, IncrType incr) + : m_first(first), m_size(size), m_incr(incr) {} + + enum { + // SizeAtCompileTime = internal::get_fixed_value::value, + IncrAtCompileTime = internal::get_fixed_value::value + }; + + /** \returns the size, i.e., number of elements, of the sequence */ + constexpr Index size() const { return m_size; } + + /** \returns the first element \f$ a_0 \f$ in the sequence */ + constexpr Index first() const { return m_first; } + + /** \returns the value \f$ a_i \f$ at index \a i in the sequence. */ + constexpr Index operator[](Index i) const { return m_first + i * m_incr; } + + constexpr const FirstType& firstObject() const { return m_first; } + constexpr const SizeType& sizeObject() const { return m_size; } + constexpr const IncrType& incrObject() const { return m_incr; } + + protected: + FirstType m_first; + SizeType m_size; + IncrType m_incr; + + public: + constexpr auto reverse() const -> decltype(Eigen::seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr)) { + return seqN(m_first + (m_size + fix<-1>()) * m_incr, m_size, -m_incr); + } +}; + +/** \returns an ArithmeticSequence starting at \a first, of length \a size, and increment \a incr + * + * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */ +template +ArithmeticSequence::type, + typename internal::cleanup_index_type::type, + typename internal::cleanup_seq_incr::type> +seqN(FirstType first, SizeType size, IncrType incr) { + return ArithmeticSequence::type, + typename internal::cleanup_index_type::type, + typename internal::cleanup_seq_incr::type>(first, size, incr); +} + +/** \returns an ArithmeticSequence starting at \a first, of length \a size, and unit increment + * + * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) */ +template +ArithmeticSequence::type, + typename internal::cleanup_index_type::type> +seqN(FirstType first, SizeType size) { + return ArithmeticSequence::type, + typename internal::cleanup_index_type::type>(first, size); +} + +#ifdef EIGEN_PARSED_BY_DOXYGEN + +/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and with positive (or negative) increment \a + * incr + * + * It is essentially an alias to: + * \code + * seqN(f, (l-f+incr)/incr, incr); + * \endcode + * + * \sa seqN(FirstType,SizeType,IncrType), seq(FirstType,LastType) + */ +template +auto seq(FirstType f, LastType l, IncrType incr); + +/** \returns an ArithmeticSequence starting at \a f, up (or down) to \a l, and unit increment + * + * It is essentially an alias to: + * \code + * seqN(f,l-f+1); + * \endcode + * + * \sa seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) + */ +template +auto seq(FirstType f, LastType l); + +#else // EIGEN_PARSED_BY_DOXYGEN + +template +auto seq(FirstType f, LastType l) + -> decltype(seqN(typename internal::cleanup_index_type::type(f), + (typename internal::cleanup_index_type::type(l) - + typename internal::cleanup_index_type::type(f) + fix<1>()))) { + return seqN(typename internal::cleanup_index_type::type(f), + (typename internal::cleanup_index_type::type(l) - + typename internal::cleanup_index_type::type(f) + fix<1>())); +} + +template +auto seq(FirstType f, LastType l, IncrType incr) + -> decltype(seqN(typename internal::cleanup_index_type::type(f), + (typename internal::cleanup_index_type::type(l) - + typename internal::cleanup_index_type::type(f) + + typename internal::cleanup_seq_incr::type(incr)) / + typename internal::cleanup_seq_incr::type(incr), + typename internal::cleanup_seq_incr::type(incr))) { + typedef typename internal::cleanup_seq_incr::type CleanedIncrType; + return seqN(typename internal::cleanup_index_type::type(f), + (typename internal::cleanup_index_type::type(l) - + typename internal::cleanup_index_type::type(f) + CleanedIncrType(incr)) / + CleanedIncrType(incr), + CleanedIncrType(incr)); +} + +#endif // EIGEN_PARSED_BY_DOXYGEN + +namespace placeholders { + +/** \cpp11 + * \returns a symbolic ArithmeticSequence representing the last \a size elements with increment \a incr. + * + * It is a shortcut for: \code seqN(last-(size-fix<1>)*incr, size, incr) \endcode + * + * \sa lastN(SizeType), seqN(FirstType,SizeType), seq(FirstType,LastType,IncrType) */ +template +auto lastN(SizeType size, IncrType incr) + -> decltype(seqN(Eigen::placeholders::last - (size - fix<1>()) * incr, size, incr)) { + return seqN(Eigen::placeholders::last - (size - fix<1>()) * incr, size, incr); +} + +/** \cpp11 + * \returns a symbolic ArithmeticSequence representing the last \a size elements with a unit increment. + * + * It is a shortcut for: \code seq(last+fix<1>-size, last) \endcode + * + * \sa lastN(SizeType,IncrType, seqN(FirstType,SizeType), seq(FirstType,LastType) */ +template +auto lastN(SizeType size) -> decltype(seqN(Eigen::placeholders::last + fix<1>() - size, size)) { + return seqN(Eigen::placeholders::last + fix<1>() - size, size); +} + +} // namespace placeholders + +/** \namespace Eigen::indexing + * \ingroup Core_Module + * + * The sole purpose of this namespace is to be able to import all functions + * and symbols that are expected to be used within operator() for indexing + * and slicing. If you already imported the whole Eigen namespace: + * \code using namespace Eigen; \endcode + * then you are already all set. Otherwise, if you don't want/cannot import + * the whole Eigen namespace, the following line: + * \code using namespace Eigen::indexing; \endcode + * is equivalent to: + * \code + using Eigen::fix; + using Eigen::seq; + using Eigen::seqN; + using Eigen::placeholders::all; + using Eigen::placeholders::last; + using Eigen::placeholders::lastN; // c++11 only + using Eigen::placeholders::lastp1; + \endcode + */ +namespace indexing { +using Eigen::fix; +using Eigen::seq; +using Eigen::seqN; +using Eigen::placeholders::all; +using Eigen::placeholders::last; +using Eigen::placeholders::lastN; +using Eigen::placeholders::lastp1; +} // namespace indexing + +} // end namespace Eigen + +#endif // EIGEN_ARITHMETIC_SEQUENCE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Array.h b/o-voxel/third_party/eigen/Eigen/src/Core/Array.h new file mode 100644 index 0000000000000000000000000000000000000000..15a40758419d17ddfea67009f2992c333886f01b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Array.h @@ -0,0 +1,376 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAY_H +#define EIGEN_ARRAY_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits> + : traits> { + typedef ArrayXpr XprKind; + typedef ArrayBase> XprBase; +}; +} // namespace internal + +/** \class Array + * \ingroup Core_Module + * + * \brief General-purpose arrays with easy API for coefficient-wise operations + * + * The %Array class is very similar to the Matrix class. It provides + * general-purpose one- and two-dimensional arrays. The difference between the + * %Array and the %Matrix class is primarily in the API: the API for the + * %Array class provides easy access to coefficient-wise operations, while the + * API for the %Matrix class provides easy access to linear-algebra + * operations. + * + * See documentation of class Matrix for detailed information on the template parameters + * storage layout. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAY_PLUGIN. + * + * \sa \blank \ref TutorialArrayClass, \ref TopicClassHierarchy + */ +template +class Array : public PlainObjectBase> { + public: + typedef PlainObjectBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Array) + + enum { Options = Options_ }; + typedef typename Base::PlainObject PlainObject; + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; + + public: + using Base::base; + using Base::coeff; + using Base::coeffRef; + + /** + * The usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const EigenBase& other) { + return Base::operator=(other); + } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() + */ + /* This overload is needed because the usage of + * using Base::operator=; + * fails on MSVC. Since the code below is working with GCC and MSVC, we skipped + * the usage of 'using'. This should be done only for operator=. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const Scalar& value) { + Base::setConstant(value); + return *this; + } + + /** Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const DenseBase& other) { + return Base::_set(other); + } + + /** + * \brief Assigns arrays to each other. + * + * \note This is a special case of the templated operator=. Its purpose is + * to prevent a default operator= from hiding the templated operator=. + * + * \callgraph + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array& operator=(const Array& other) { return Base::_set(other); } + + /** Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ +#ifdef EIGEN_INITIALIZE_COEFFS + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array() : Base() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array() = default; +#endif + /** \brief Move constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(Array&&) = default; + EIGEN_DEVICE_FUNC Array& operator=(Array&& other) noexcept(std::is_nothrow_move_assignable::value) { + Base::operator=(std::move(other)); + return *this; + } + + /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. + * + * \only_for_vectors + * + * This constructor is for 1D array or vectors with more than 4 coefficients. + * + * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this + * constructor must match the the fixed number of rows (resp. columns) of \c *this. + * + * + * Example: \include Array_variadic_ctor_cxx11.cpp + * Output: \verbinclude Array_variadic_ctor_cxx11.out + * + * \sa Array(const std::initializer_list>&) + * \sa Array(const Scalar&), Array(const Scalar&,const Scalar&) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, + const ArgTypes&... args) + : Base(a0, a1, a2, a3, args...) {} + + /** \brief Constructs an array and initializes it from the coefficients given as initializer-lists grouped by row. + * \cpp11 + * + * In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients: + * + * Example: \include Array_initializer_list_23_cxx11.cpp + * Output: \verbinclude Array_initializer_list_23_cxx11.out + * + * Each of the inner initializer lists must contain the exact same number of elements, otherwise an assertion is + * triggered. + * + * In the case of a compile-time column 1D array, implicit transposition from a single row is allowed. + * Therefore Array{{1,2,3,4,5}} is legal and the more verbose syntax + * Array{{1},{2},{3},{4},{5}} can be avoided: + * + * Example: \include Array_initializer_list_vector_cxx11.cpp + * Output: \verbinclude Array_initializer_list_vector_cxx11.out + * + * In the case of fixed-sized arrays, the initializer list sizes must exactly match the array sizes, + * and implicit transposition is allowed for compile-time 1D arrays only. + * + * \sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array( + const std::initializer_list>& list) + : Base(list) {} + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Array(const T& x) { + Base::template _init1(x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const T0& val0, const T1& val1) { + this->template _init2(val0, val1); + } + +#else + /** \brief Constructs a fixed-sized array initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Array(const Scalar* data); + /** Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * Note that this is only useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass the dimension here, so it makes more sense to use the default + * constructor Array() instead. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Array(Index dim); + /** constructs an initialized 1x1 Array with the given coefficient + * \sa const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args */ + Array(const Scalar& value); + /** constructs an uninitialized array with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size arrays. For fixed-size arrays, + * it is redundant to pass these parameters, so one should use the default constructor + * Array() instead. */ + Array(Index rows, Index cols); + /** constructs an initialized 2D vector with given coefficients + * \sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args) */ + Array(const Scalar& val0, const Scalar& val1); +#endif // end EIGEN_PARSED_BY_DOXYGEN + + /** constructs an initialized 3D vector with given coefficients + * \sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 3) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + } + /** constructs an initialized 4D vector with given coefficients + * \sa Array(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array(const Scalar& val0, const Scalar& val1, const Scalar& val2, + const Scalar& val3) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Array, 4) + m_storage.data()[0] = val0; + m_storage.data()[1] = val1; + m_storage.data()[2] = val2; + m_storage.data()[3] = val3; + } + + /** Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Array(const Array&) = default; + + private: + struct PrivateType {}; + + public: + /** \sa MatrixBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Array( + const EigenBase& other, + std::enable_if_t::value, PrivateType> = + PrivateType()) + : Base(other.derived()) {} + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); } + +#ifdef EIGEN_ARRAY_PLUGIN +#include EIGEN_ARRAY_PLUGIN +#endif + + private: + template + friend struct internal::matrix_swap_impl; +}; + +/** \defgroup arraytypedefs Global array typedefs + * \ingroup Core_Module + * + * %Eigen defines several typedef shortcuts for most common 1D and 2D array types. + * + * The general patterns are the following: + * + * \c ArrayRowsColsType where \c Rows and \c Cols can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for + * dynamic size, and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c + * cd for complex double. + * + * For example, \c Array33d is a fixed-size 3x3 array type of doubles, and \c ArrayXXf is a dynamic-size matrix of + * floats. + * + * There are also \c ArraySizeType which are self-explanatory. For example, \c Array4cf is + * a fixed-size 1D array of 4 complex floats. + * + * With \cpp11, template alias are also defined for common sizes. + * They follow the same pattern as above except that the scalar type suffix is replaced by a + * template parameter, i.e.: + * - `ArrayRowsCols` where `Rows` and `Cols` can be \c 2,\c 3,\c 4, or \c X for fixed or dynamic size. + * - `ArraySize` where `Size` can be \c 2,\c 3,\c 4 or \c X for fixed or dynamic size 1D arrays. + * + * \sa class Array + */ + +#define EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ + /** \ingroup arraytypedefs */ \ + typedef Array Array##SizeSuffix##SizeSuffix##TypeSuffix; \ + /** \ingroup arraytypedefs */ \ + typedef Array Array##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ + /** \ingroup arraytypedefs */ \ + typedef Array Array##Size##X##TypeSuffix; \ + /** \ingroup arraytypedefs */ \ + typedef Array Array##X##Size##TypeSuffix; + +#define EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ + EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 2, 2) \ + EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 3, 3) \ + EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, 4, 4) \ + EIGEN_MAKE_ARRAY_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ + EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ + EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ + EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_ARRAY_TYPEDEFS +#undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS + +#define EIGEN_MAKE_ARRAY_TYPEDEFS(Size, SizeSuffix) \ + /** \ingroup arraytypedefs */ \ + /** \brief \cpp11 */ \ + template \ + using Array##SizeSuffix##SizeSuffix = Array; \ + /** \ingroup arraytypedefs */ \ + /** \brief \cpp11 */ \ + template \ + using Array##SizeSuffix = Array; + +#define EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(Size) \ + /** \ingroup arraytypedefs */ \ + /** \brief \cpp11 */ \ + template \ + using Array##Size##X = Array; \ + /** \ingroup arraytypedefs */ \ + /** \brief \cpp11 */ \ + template \ + using Array##X##Size = Array; + +EIGEN_MAKE_ARRAY_TYPEDEFS(2, 2) +EIGEN_MAKE_ARRAY_TYPEDEFS(3, 3) +EIGEN_MAKE_ARRAY_TYPEDEFS(4, 4) +EIGEN_MAKE_ARRAY_TYPEDEFS(Dynamic, X) +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(2) +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(3) +EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS(4) + +#undef EIGEN_MAKE_ARRAY_TYPEDEFS +#undef EIGEN_MAKE_ARRAY_FIXED_TYPEDEFS + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, SizeSuffix) \ + using Eigen::Matrix##SizeSuffix##TypeSuffix; \ + using Eigen::Vector##SizeSuffix##TypeSuffix; \ + using Eigen::RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(TypeSuffix) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 2) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 3) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, 4) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE_AND_SIZE(TypeSuffix, X) + +#define EIGEN_USING_ARRAY_TYPEDEFS \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(i) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(f) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(d) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cf) \ + EIGEN_USING_ARRAY_TYPEDEFS_FOR_TYPE(cd) + +} // end namespace Eigen + +#endif // EIGEN_ARRAY_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ArrayBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/ArrayBase.h new file mode 100644 index 0000000000000000000000000000000000000000..2a2284a67d6ae908b3401a1767c966aba7bfc026 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ArrayBase.h @@ -0,0 +1,213 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYBASE_H +#define EIGEN_ARRAYBASE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +class MatrixWrapper; + +/** \class ArrayBase + * \ingroup Core_Module + * + * \brief Base class for all 1D and 2D array, and related expressions + * + * An array is similar to a dense vector or matrix. While matrices are mathematical + * objects with well defined linear algebra operators, an array is just a collection + * of scalar values arranged in a one or two dimensional fashion. As the main consequence, + * all operations applied to an array are performed coefficient wise. Furthermore, + * arrays support scalar math functions of the c++ standard library (e.g., std::sin(x)), and convenient + * constructors allowing to easily write generic code working for both scalar values + * and arrays. + * + * This class is the base that is inherited by all array expression types. + * + * \tparam Derived is the derived type, e.g., an array or an expression type. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_ARRAYBASE_PLUGIN. + * + * \sa class MatrixBase, \ref TopicClassHierarchy + */ +template +class ArrayBase : public DenseBase { + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** The base class for a given storage type. */ + typedef ArrayBase StorageBaseType; + + typedef ArrayBase Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::ColsAtCompileTime; + using Base::Flags; + using Base::IsVectorAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::RowsAtCompileTime; + using Base::SizeAtCompileTime; + + using Base::coeff; + using Base::coeffRef; + using Base::cols; + using Base::const_cast_derived; + using Base::derived; + using Base::lazyAssign; + using Base::rows; + using Base::size; + using Base::operator-; + using Base::operator=; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + + typedef typename Base::PlainObject PlainObject; + + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp, PlainObject> ConstantReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::ArrayBase +#define EIGEN_DOC_UNARY_ADDONS(X, Y) +#include "../plugins/MatrixCwiseUnaryOps.inc" +#include "../plugins/ArrayCwiseUnaryOps.inc" +#include "../plugins/CommonCwiseBinaryOps.inc" +#include "../plugins/MatrixCwiseBinaryOps.inc" +#include "../plugins/ArrayCwiseBinaryOps.inc" +#ifdef EIGEN_ARRAYBASE_PLUGIN +#include EIGEN_ARRAYBASE_PLUGIN +#endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ArrayBase& other) { + internal::call_assignment(derived(), other.derived()); + return derived(); + } + + /** Set all the entries to \a value. + * \sa DenseBase::setConstant(), DenseBase::fill() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const Scalar& value) { + Base::setConstant(value); + return derived(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const Scalar& other) { + internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(), other), + internal::add_assign_op()); + return derived(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const Scalar& other) { + internal::call_assignment(this->derived(), PlainObject::Constant(rows(), cols(), other), + internal::sub_assign_op()); + return derived(); + } + + /** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); + } + + /** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); + } + + /** replaces \c *this by \c *this * \a other coefficient wise. + * + * \returns a reference to \c *this + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::mul_assign_op()); + return derived(); + } + + /** replaces \c *this by \c *this / \a other coefficient wise. + * + * \returns a reference to \c *this + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const ArrayBase& other) { + call_assignment(derived(), other.derived(), internal::div_assign_op()); + return derived(); + } + + public: + EIGEN_DEVICE_FUNC ArrayBase& array() { return *this; } + EIGEN_DEVICE_FUNC const ArrayBase& array() const { return *this; } + + /** \returns an \link Eigen::MatrixBase Matrix \endlink expression of this array + * \sa MatrixBase::array() */ + EIGEN_DEVICE_FUNC MatrixWrapper matrix() { return MatrixWrapper(derived()); } + EIGEN_DEVICE_FUNC const MatrixWrapper matrix() const { + return MatrixWrapper(derived()); + } + + // template + // inline void evalTo(Dest& dst) const { dst = matrix(); } + + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(ArrayBase) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(ArrayBase) + + private: + explicit ArrayBase(Index); + ArrayBase(Index, Index); + template + explicit ArrayBase(const ArrayBase&); + + protected: + // mixing arrays and matrices is not legal + template + Derived& operator+=(const MatrixBase&) { + EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar)) == -1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); + return *this; + } + // mixing arrays and matrices is not legal + template + Derived& operator-=(const MatrixBase&) { + EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar)) == -1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); + return *this; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_ARRAYBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ArrayWrapper.h b/o-voxel/third_party/eigen/Eigen/src/Core/ArrayWrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..c43346ee55fe2a777c279cd43d044c31a05579ab --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ArrayWrapper.h @@ -0,0 +1,165 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ARRAYWRAPPER_H +#define EIGEN_ARRAYWRAPPER_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class ArrayWrapper + * \ingroup Core_Module + * + * \brief Expression of a mathematical vector or matrix as an array object + * + * This class is the return type of MatrixBase::array(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::array(), class MatrixWrapper + */ + +namespace internal { +template +struct traits > : public traits > { + typedef ArrayXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits >::Flags, + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag + }; +}; +} // namespace internal + +template +class ArrayWrapper : public ArrayBase > { + public: + typedef ArrayBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ArrayWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ArrayWrapper) + typedef internal::remove_all_t NestedExpression; + + typedef std::conditional_t::value, Scalar, const Scalar> + ScalarWithConstIfNotLvalue; + + typedef typename internal::ref_selector::non_const_type NestedExpressionType; + + using Base::coeffRef; + + EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE ArrayWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { + return m_expression.coeffRef(rowId, colId); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.coeffRef(index); } + + template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { + dst = m_expression; + } + + EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { m_expression.resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { m_expression.resize(rows, cols); } + + protected: + NestedExpressionType m_expression; +}; + +/** \class MatrixWrapper + * \ingroup Core_Module + * + * \brief Expression of an array as a mathematical vector or matrix + * + * This class is the return type of ArrayBase::matrix(), and most of the time + * this is the only way it is use. + * + * \sa MatrixBase::matrix(), class ArrayWrapper + */ + +namespace internal { +template +struct traits > : public traits > { + typedef MatrixXpr XprKind; + // Let's remove NestByRefBit + enum { + Flags0 = traits >::Flags, + LvalueBitFlag = is_lvalue::value ? LvalueBit : 0, + Flags = (Flags0 & ~(NestByRefBit | LvalueBit)) | LvalueBitFlag + }; +}; +} // namespace internal + +template +class MatrixWrapper : public MatrixBase > { + public: + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(MatrixWrapper) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(MatrixWrapper) + typedef internal::remove_all_t NestedExpression; + + typedef std::conditional_t::value, Scalar, const Scalar> + ScalarWithConstIfNotLvalue; + + typedef typename internal::ref_selector::non_const_type NestedExpressionType; + + using Base::coeffRef; + + EIGEN_DEVICE_FUNC explicit inline MatrixWrapper(ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { return m_expression.data(); } + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_expression.data(); } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { + return m_expression.derived().coeffRef(rowId, colId); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { return m_expression.coeffRef(index); } + + EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { + return m_expression; + } + + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index) */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { m_expression.resize(newSize); } + /** Forwards the resizing request to the nested expression + * \sa DenseBase::resize(Index,Index)*/ + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { m_expression.resize(rows, cols); } + + protected: + NestedExpressionType m_expression; +}; + +} // end namespace Eigen + +#endif // EIGEN_ARRAYWRAPPER_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Assign.h b/o-voxel/third_party/eigen/Eigen/src/Core/Assign.h new file mode 100644 index 0000000000000000000000000000000000000000..e5f14009b35fa20b06c0f6143ab9d1fbe21abdbc --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Assign.h @@ -0,0 +1,80 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007 Michael Olbrich +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_H +#define EIGEN_ASSIGN_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::lazyAssign(const DenseBase& other) { + enum { SameType = internal::is_same::value }; + + EIGEN_STATIC_ASSERT_LVALUE(Derived) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Derived, OtherDerived) + EIGEN_STATIC_ASSERT( + SameType, + YOU_MIXED_DIFFERENT_NUMERIC_TYPES__YOU_NEED_TO_USE_THE_CAST_METHOD_OF_MATRIXBASE_TO_CAST_NUMERIC_TYPES_EXPLICITLY) + + eigen_assert(rows() == other.rows() && cols() == other.cols()); + internal::call_assignment_no_alias(derived(), other.derived()); + + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator=(const DenseBase& other) { + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const MatrixBase& other) { + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const DenseBase& other) { + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=(const EigenBase& other) { + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::operator=( + const ReturnByValue& other) { + other.derived().evalTo(derived()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/AssignEvaluator.h b/o-voxel/third_party/eigen/Eigen/src/Core/AssignEvaluator.h new file mode 100644 index 0000000000000000000000000000000000000000..cec573adeb9224b92c1dd7385c2c847f8ed95002 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/AssignEvaluator.h @@ -0,0 +1,1057 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_ASSIGN_EVALUATOR_H +#define EIGEN_ASSIGN_EVALUATOR_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +// This implementation is based on Assign.h + +namespace internal { + +/*************************************************************************** + * Part 1 : the logic deciding a strategy for traversal and unrolling * + ***************************************************************************/ + +// copy_using_evaluator_traits is based on assign_traits + +template +struct copy_using_evaluator_traits { + using Src = typename SrcEvaluator::XprType; + using Dst = typename DstEvaluator::XprType; + using DstScalar = typename Dst::Scalar; + + static constexpr int DstFlags = DstEvaluator::Flags; + static constexpr int SrcFlags = SrcEvaluator::Flags; + + public: + static constexpr int DstAlignment = DstEvaluator::Alignment; + static constexpr int SrcAlignment = SrcEvaluator::Alignment; + static constexpr int JointAlignment = plain_enum_min(DstAlignment, SrcAlignment); + static constexpr bool DstHasDirectAccess = bool(DstFlags & DirectAccessBit); + static constexpr bool SrcIsRowMajor = bool(SrcFlags & RowMajorBit); + static constexpr bool DstIsRowMajor = bool(DstFlags & RowMajorBit); + static constexpr bool IsVectorAtCompileTime = Dst::IsVectorAtCompileTime; + static constexpr int RowsAtCompileTime = size_prefer_fixed(Src::RowsAtCompileTime, Dst::RowsAtCompileTime); + static constexpr int ColsAtCompileTime = size_prefer_fixed(Src::ColsAtCompileTime, Dst::ColsAtCompileTime); + static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime); + static constexpr int MaxRowsAtCompileTime = + min_size_prefer_fixed(Src::MaxRowsAtCompileTime, Dst::MaxRowsAtCompileTime); + static constexpr int MaxColsAtCompileTime = + min_size_prefer_fixed(Src::MaxColsAtCompileTime, Dst::MaxColsAtCompileTime); + static constexpr int MaxSizeAtCompileTime = + min_size_prefer_fixed(Src::MaxSizeAtCompileTime, Dst::MaxSizeAtCompileTime); + static constexpr int InnerSizeAtCompileTime = IsVectorAtCompileTime ? SizeAtCompileTime + : DstIsRowMajor ? ColsAtCompileTime + : RowsAtCompileTime; + static constexpr int MaxInnerSizeAtCompileTime = IsVectorAtCompileTime ? MaxSizeAtCompileTime + : DstIsRowMajor ? MaxColsAtCompileTime + : MaxRowsAtCompileTime; + static constexpr int RestrictedInnerSize = min_size_prefer_fixed(MaxInnerSizeAtCompileTime, MaxPacketSize); + static constexpr int RestrictedLinearSize = min_size_prefer_fixed(MaxSizeAtCompileTime, MaxPacketSize); + static constexpr int OuterStride = outer_stride_at_compile_time::ret; + + // TODO distinguish between linear traversal and inner-traversals + using LinearPacketType = typename find_best_packet::type; + using InnerPacketType = typename find_best_packet::type; + + static constexpr int LinearPacketSize = unpacket_traits::size; + static constexpr int InnerPacketSize = unpacket_traits::size; + + public: + static constexpr int LinearRequiredAlignment = unpacket_traits::alignment; + static constexpr int InnerRequiredAlignment = unpacket_traits::alignment; + + private: + static constexpr bool StorageOrdersAgree = DstIsRowMajor == SrcIsRowMajor; + static constexpr bool MightVectorize = StorageOrdersAgree && bool(DstFlags & SrcFlags & ActualPacketAccessBit) && + bool(functor_traits::PacketAccess); + static constexpr bool MayInnerVectorize = MightVectorize && (InnerSizeAtCompileTime != Dynamic) && + (InnerSizeAtCompileTime % InnerPacketSize == 0) && + (OuterStride != Dynamic) && (OuterStride % InnerPacketSize == 0) && + (EIGEN_UNALIGNED_VECTORIZE || JointAlignment >= InnerRequiredAlignment); + static constexpr bool MayLinearize = StorageOrdersAgree && (DstFlags & SrcFlags & LinearAccessBit); + static constexpr bool MayLinearVectorize = + MightVectorize && MayLinearize && DstHasDirectAccess && + (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment) || MaxSizeAtCompileTime == Dynamic) && + (MaxSizeAtCompileTime == Dynamic || MaxSizeAtCompileTime >= LinearPacketSize); + /* If the destination isn't aligned, we have to do runtime checks and we don't unroll, + so it's only good for large enough sizes. */ + static constexpr int InnerSizeThreshold = (EIGEN_UNALIGNED_VECTORIZE ? 1 : 3) * InnerPacketSize; + static constexpr bool MaySliceVectorize = + MightVectorize && DstHasDirectAccess && + (MaxInnerSizeAtCompileTime == Dynamic || MaxInnerSizeAtCompileTime >= InnerSizeThreshold); + /* slice vectorization can be slow, so we only want it if the slices are big, which is + indicated by InnerMaxSize rather than InnerSize, think of the case of a dynamic block + in a fixed-size matrix + However, with EIGEN_UNALIGNED_VECTORIZE and unrolling, slice vectorization is still worth it */ + + public: + static constexpr int Traversal = SizeAtCompileTime == 0 ? AllAtOnceTraversal + : (MayLinearVectorize && (LinearPacketSize > InnerPacketSize)) + ? LinearVectorizedTraversal + : MayInnerVectorize ? InnerVectorizedTraversal + : MayLinearVectorize ? LinearVectorizedTraversal + : MaySliceVectorize ? SliceVectorizedTraversal + : MayLinearize ? LinearTraversal + : DefaultTraversal; + static constexpr bool Vectorized = Traversal == InnerVectorizedTraversal || Traversal == LinearVectorizedTraversal || + Traversal == SliceVectorizedTraversal; + + using PacketType = std::conditional_t; + + private: + static constexpr int ActualPacketSize = Vectorized ? unpacket_traits::size : 1; + static constexpr int UnrollingLimit = EIGEN_UNROLLING_LIMIT * ActualPacketSize; + static constexpr int CoeffReadCost = int(DstEvaluator::CoeffReadCost) + int(SrcEvaluator::CoeffReadCost); + static constexpr bool MayUnrollCompletely = + (SizeAtCompileTime != Dynamic) && (SizeAtCompileTime * CoeffReadCost <= UnrollingLimit); + static constexpr bool MayUnrollInner = + (InnerSizeAtCompileTime != Dynamic) && (InnerSizeAtCompileTime * CoeffReadCost <= UnrollingLimit); + + public: + static constexpr int Unrolling = + (Traversal == InnerVectorizedTraversal || Traversal == DefaultTraversal) + ? (MayUnrollCompletely ? CompleteUnrolling + : MayUnrollInner ? InnerUnrolling + : NoUnrolling) + : Traversal == LinearVectorizedTraversal + ? (MayUnrollCompletely && (EIGEN_UNALIGNED_VECTORIZE || (DstAlignment >= LinearRequiredAlignment)) + ? CompleteUnrolling + : NoUnrolling) + : Traversal == LinearTraversal ? (MayUnrollCompletely ? CompleteUnrolling : NoUnrolling) +#if EIGEN_UNALIGNED_VECTORIZE + : Traversal == SliceVectorizedTraversal ? (MayUnrollInner ? InnerUnrolling : NoUnrolling) +#endif + : NoUnrolling; + static constexpr bool UsePacketSegment = has_packet_segment::value; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() { + std::cerr << "DstXpr: " << typeid(typename DstEvaluator::XprType).name() << std::endl; + std::cerr << "SrcXpr: " << typeid(typename SrcEvaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + std::cerr << "DstFlags" + << " = " << DstFlags << " (" << demangle_flags(DstFlags) << " )" << std::endl; + std::cerr << "SrcFlags" + << " = " << SrcFlags << " (" << demangle_flags(SrcFlags) << " )" << std::endl; + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(DstAlignment) + EIGEN_DEBUG_VAR(SrcAlignment) + EIGEN_DEBUG_VAR(LinearRequiredAlignment) + EIGEN_DEBUG_VAR(InnerRequiredAlignment) + EIGEN_DEBUG_VAR(JointAlignment) + EIGEN_DEBUG_VAR(InnerSizeAtCompileTime) + EIGEN_DEBUG_VAR(MaxInnerSizeAtCompileTime) + EIGEN_DEBUG_VAR(LinearPacketSize) + EIGEN_DEBUG_VAR(InnerPacketSize) + EIGEN_DEBUG_VAR(ActualPacketSize) + EIGEN_DEBUG_VAR(StorageOrdersAgree) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearize) + EIGEN_DEBUG_VAR(MayInnerVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" + << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(SrcEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(DstEvaluator::CoeffReadCost) + EIGEN_DEBUG_VAR(Dst::SizeAtCompileTime) + EIGEN_DEBUG_VAR(UnrollingLimit) + EIGEN_DEBUG_VAR(MayUnrollCompletely) + EIGEN_DEBUG_VAR(MayUnrollInner) + std::cerr << "Unrolling" + << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** + * Part 2 : meta-unrollers + ***************************************************************************/ + +/************************ +*** Default traversal *** +************************/ + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { + static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + kernel.assignCoeffByOuterInner(Outer, Inner); + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_CompleteUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer) { + kernel.assignCoeffByOuterInner(outer, Index_); + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +template +struct copy_using_evaluator_DefaultTraversal_InnerUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + kernel.assignCoeff(Index_); + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_LinearTraversal_CompleteUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct copy_using_evaluator_innervec_CompleteUnrolling { + using PacketType = typename Kernel::PacketType; + static constexpr int Outer = Index_ / Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int Inner = Index_ % Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int NextIndex = Index_ + unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + kernel.template assignPacketByOuterInner(Outer, Inner); + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_innervec_CompleteUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling { + using PacketType = typename Kernel::PacketType; + static constexpr int NextIndex = Index_ + unpacket_traits::size; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) { + kernel.template assignPacketByOuterInner(outer, Index_); + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, + outer); + } +}; + +template +struct copy_using_evaluator_innervec_InnerUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} +}; + +template +struct copy_using_evaluator_innervec_segment { + using PacketType = typename Kernel::PacketType; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel, Index outer) { + kernel.template assignPacketSegmentByOuterInner(outer, Start, 0, + Stop - Start); + } +}; + +template +struct copy_using_evaluator_innervec_segment + : copy_using_evaluator_DefaultTraversal_InnerUnrolling {}; + +template +struct copy_using_evaluator_innervec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} +}; + +template +struct copy_using_evaluator_innervec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&, Index) {} +}; + +/*************************************************************************** + * Part 3 : implementation of all cases + ***************************************************************************/ + +// dense_assignment_loop is based on assign_impl + +template +struct dense_assignment_loop_impl; + +template +struct dense_assignment_loop { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { +#ifdef __cpp_lib_is_constant_evaluated + if (internal::is_constant_evaluated()) + dense_assignment_loop_impl::run(kernel); + else +#endif + dense_assignment_loop_impl::run(kernel); + } +}; + +/************************ +***** Special Cases ***** +************************/ + +// Zero-sized assignment is a no-op. +template +struct dense_assignment_loop_impl { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& /*kernel*/) { + EIGEN_STATIC_ASSERT(SizeAtCompileTime == 0, EIGEN_INTERNAL_ERROR_PLEASE_FILE_A_BUG_REPORT) + } +}; + +/************************ +*** Default traversal *** +************************/ + +template +struct dense_assignment_loop_impl { + EIGEN_DEVICE_FUNC static void EIGEN_STRONG_INLINE constexpr run(Kernel& kernel) { + for (Index outer = 0; outer < kernel.outerSize(); ++outer) { + for (Index inner = 0; inner < kernel.innerSize(); ++inner) { + kernel.assignCoeffByOuterInner(outer, inner); + } + } + } +}; + +template +struct dense_assignment_loop_impl { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + copy_using_evaluator_DefaultTraversal_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop_impl { + static constexpr int InnerSizeAtCompileTime = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + const Index outerSize = kernel.outerSize(); + for (Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_DefaultTraversal_InnerUnrolling::run(kernel, outer); + } +}; + +/*************************** +*** Linear vectorization *** +***************************/ + +// The goal of unaligned_dense_assignment_loop is simply to factorize the handling +// of the non vectorizable beginning and ending parts + +template +struct unaligned_dense_assignment_loop { + // if Skip == true, then do nothing + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*start*/, Index /*end*/) {} + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& /*kernel*/, Index /*outer*/, + Index /*innerStart*/, Index /*innerEnd*/) {} +}; + +template +struct unaligned_dense_assignment_loop { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) { + Index count = end - start; + eigen_assert(count <= unpacket_traits::size); + if (count > 0) kernel.template assignPacketSegment(start, 0, count); + } + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index start, Index end) { + Index count = end - start; + eigen_assert(count <= unpacket_traits::size); + if (count > 0) + kernel.template assignPacketSegmentByOuterInner(outer, start, 0, count); + } +}; + +template +struct unaligned_dense_assignment_loop { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index start, Index end) { + for (Index index = start; index < end; ++index) kernel.assignCoeff(index); + } + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Index outer, Index innerStart, + Index innerEnd) { + for (Index inner = innerStart; inner < innerEnd; ++inner) kernel.assignCoeffByOuterInner(outer, inner); + } +}; + +template +struct copy_using_evaluator_linearvec_CompleteUnrolling { + using PacketType = typename Kernel::PacketType; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + static constexpr int NextIndex = Index_ + unpacket_traits::size; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + kernel.template assignPacket(Index_); + copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); + } +}; + +template +struct copy_using_evaluator_linearvec_CompleteUnrolling { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct copy_using_evaluator_linearvec_segment { + using PacketType = typename Kernel::PacketType; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + kernel.template assignPacketSegment(Index_, 0, Stop - Index_); + } +}; + +template +struct copy_using_evaluator_linearvec_segment + : copy_using_evaluator_LinearTraversal_CompleteUnrolling {}; + +template +struct copy_using_evaluator_linearvec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct copy_using_evaluator_linearvec_segment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel&) {} +}; + +template +struct dense_assignment_loop_impl { + using Scalar = typename Kernel::Scalar; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar)); + static constexpr int RequestedAlignment = unpacket_traits::alignment; + static constexpr bool Alignable = + (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); + static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment; + static constexpr bool DstIsAligned = DstAlignment >= Alignment; + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + using head_loop = + unaligned_dense_assignment_loop; + using tail_loop = unaligned_dense_assignment_loop; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + const Index size = kernel.size(); + const Index alignedStart = DstIsAligned ? 0 : first_aligned(kernel.dstDataPtr(), size); + const Index alignedEnd = alignedStart + numext::round_down(size - alignedStart, PacketSize); + + head_loop::run(kernel, 0, alignedStart); + + for (Index index = alignedStart; index < alignedEnd; index += PacketSize) + kernel.template assignPacket(index); + + tail_loop::run(kernel, alignedEnd, size); + } +}; + +template +struct dense_assignment_loop_impl { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int Size = Kernel::AssignmentTraits::SizeAtCompileTime; + static constexpr int AlignedSize = numext::round_down(Size, PacketSize); + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + copy_using_evaluator_linearvec_CompleteUnrolling::run(kernel); + copy_using_evaluator_linearvec_segment::run(kernel); + } +}; + +/************************** +*** Inner vectorization *** +**************************/ + +template +struct dense_assignment_loop_impl { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + for (Index outer = 0; outer < outerSize; ++outer) + for (Index inner = 0; inner < innerSize; inner += PacketSize) + kernel.template assignPacketByOuterInner(outer, inner); + } +}; + +template +struct dense_assignment_loop_impl { + static constexpr int SizeAtCompileTime = Kernel::AssignmentTraits::SizeAtCompileTime; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + copy_using_evaluator_innervec_CompleteUnrolling::run(kernel); + } +}; + +template +struct dense_assignment_loop_impl { + static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::SrcAlignment; + static constexpr int DstAlignment = Kernel::AssignmentTraits::DstAlignment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(Kernel& kernel) { + const Index outerSize = kernel.outerSize(); + for (Index outer = 0; outer < outerSize; ++outer) + copy_using_evaluator_innervec_InnerUnrolling::run(kernel, + outer); + } +}; + +/*********************** +*** Linear traversal *** +***********************/ + +template +struct dense_assignment_loop_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + const Index size = kernel.size(); + for (Index i = 0; i < size; ++i) kernel.assignCoeff(i); + } +}; + +template +struct dense_assignment_loop_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + copy_using_evaluator_LinearTraversal_CompleteUnrolling::run( + kernel); + } +}; + +/************************** +*** Slice vectorization *** +***************************/ + +template +struct dense_assignment_loop_impl { + using Scalar = typename Kernel::Scalar; + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int SrcAlignment = Kernel::AssignmentTraits::JointAlignment; + static constexpr int DstAlignment = plain_enum_max(Kernel::AssignmentTraits::DstAlignment, alignof(Scalar)); + static constexpr int RequestedAlignment = unpacket_traits::alignment; + static constexpr bool Alignable = + (DstAlignment >= RequestedAlignment) || ((RequestedAlignment - DstAlignment) % sizeof(Scalar) == 0); + static constexpr int Alignment = Alignable ? RequestedAlignment : DstAlignment; + static constexpr bool DstIsAligned = DstAlignment >= Alignment; + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + using head_loop = unaligned_dense_assignment_loop; + using tail_loop = unaligned_dense_assignment_loop; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + const Scalar* dst_ptr = kernel.dstDataPtr(); + const Index innerSize = kernel.innerSize(); + const Index outerSize = kernel.outerSize(); + const Index alignedStep = Alignable ? (PacketSize - kernel.outerStride() % PacketSize) % PacketSize : 0; + Index alignedStart = ((!Alignable) || DstIsAligned) ? 0 : internal::first_aligned(dst_ptr, innerSize); + + for (Index outer = 0; outer < outerSize; ++outer) { + const Index alignedEnd = alignedStart + numext::round_down(innerSize - alignedStart, PacketSize); + + head_loop::run(kernel, outer, 0, alignedStart); + + // do the vectorizable part of the assignment + for (Index inner = alignedStart; inner < alignedEnd; inner += PacketSize) + kernel.template assignPacketByOuterInner(outer, inner); + + tail_loop::run(kernel, outer, alignedEnd, innerSize); + + alignedStart = numext::mini((alignedStart + alignedStep) % PacketSize, innerSize); + } + } +}; + +#if EIGEN_UNALIGNED_VECTORIZE +template +struct dense_assignment_loop_impl { + using PacketType = typename Kernel::PacketType; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int InnerSize = Kernel::AssignmentTraits::InnerSizeAtCompileTime; + static constexpr int VectorizableSize = numext::round_down(InnerSize, PacketSize); + static constexpr bool UsePacketSegment = Kernel::AssignmentTraits::UsePacketSegment; + + using packet_loop = copy_using_evaluator_innervec_InnerUnrolling; + using packet_segment_loop = copy_using_evaluator_innervec_segment; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel) { + for (Index outer = 0; outer < kernel.outerSize(); ++outer) { + packet_loop::run(kernel, outer); + packet_segment_loop::run(kernel, outer); + } + } +}; +#endif + +/*************************************************************************** + * Part 4 : Generic dense assignment kernel + ***************************************************************************/ + +// This class generalize the assignment of a coefficient (or packet) from one dense evaluator +// to another dense writable evaluator. +// It is parametrized by the two evaluators, and the actual assignment functor. +// This abstraction level permits to keep the evaluation loops as simple and as generic as possible. +// One can customize the assignment using this generic dense_assignment_kernel with different +// functors, or by completely overloading it, by-passing a functor. +template +class generic_dense_assignment_kernel { + protected: + typedef typename DstEvaluatorTypeT::XprType DstXprType; + typedef typename SrcEvaluatorTypeT::XprType SrcXprType; + + public: + typedef DstEvaluatorTypeT DstEvaluatorType; + typedef SrcEvaluatorTypeT SrcEvaluatorType; + typedef typename DstEvaluatorType::Scalar Scalar; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr generic_dense_assignment_kernel(DstEvaluatorType& dst, + const SrcEvaluatorType& src, + const Functor& func, + DstXprType& dstExpr) + : m_dst(dst), m_src(src), m_functor(func), m_dstExpr(dstExpr) { +#ifdef EIGEN_DEBUG_ASSIGN + AssignmentTraits::debug(); +#endif + } + + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_dstExpr.size(); } + EIGEN_DEVICE_FUNC constexpr Index innerSize() const noexcept { return m_dstExpr.innerSize(); } + EIGEN_DEVICE_FUNC constexpr Index outerSize() const noexcept { return m_dstExpr.outerSize(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dstExpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_dstExpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_dstExpr.outerStride(); } + + EIGEN_DEVICE_FUNC DstEvaluatorType& dstEvaluator() noexcept { return m_dst; } + EIGEN_DEVICE_FUNC const SrcEvaluatorType& srcEvaluator() const noexcept { return m_src; } + + /// Assign src(row,col) to dst(row,col) through the assignment functor. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeff(Index row, Index col) { + m_functor.assignCoeff(m_dst.coeffRef(row, col), m_src.coeff(row, col)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignCoeff(Index index) { + m_functor.assignCoeff(m_dst.coeffRef(index), m_src.coeff(index)); + } + + /// \sa assignCoeff(Index,Index) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void assignCoeffByOuterInner(Index outer, Index inner) { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignCoeff(row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { + m_functor.template assignPacket(&m_dst.coeffRef(row, col), + m_src.template packet(row, col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacket(Index index) { + m_functor.template assignPacket(&m_dst.coeffRef(index), m_src.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) { + m_functor.template assignPacketSegment( + &m_dst.coeffRef(row, col), m_src.template packetSegment(row, col, begin, count), begin, + count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) { + m_functor.template assignPacketSegment( + &m_dst.coeffRef(index), m_src.template packetSegment(index, begin, count), begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin, + Index count) { + Index row = rowIndexByOuterInner(outer, inner); + Index col = colIndexByOuterInner(outer, inner); + assignPacketSegment(row, col, begin, count); + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index rowIndexByOuterInner(Index outer, Index inner) { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::RowsAtCompileTime) == 1 ? 0 + : int(Traits::ColsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags) & RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr Index colIndexByOuterInner(Index outer, Index inner) { + typedef typename DstEvaluatorType::ExpressionTraits Traits; + return int(Traits::ColsAtCompileTime) == 1 ? 0 + : int(Traits::RowsAtCompileTime) == 1 ? inner + : int(DstEvaluatorType::Flags) & RowMajorBit ? inner + : outer; + } + + EIGEN_DEVICE_FUNC const Scalar* dstDataPtr() const { return m_dstExpr.data(); } + + protected: + DstEvaluatorType& m_dst; + const SrcEvaluatorType& m_src; + const Functor& m_functor; + // TODO find a way to avoid the needs of the original expression + DstXprType& m_dstExpr; +}; + +// Special kernel used when computing small products whose operands have dynamic dimensions. It ensures that the +// PacketSize used is no larger than 4, thereby increasing the chance that vectorized instructions will be used +// when computing the product. + +template +class restricted_packet_dense_assignment_kernel + : public generic_dense_assignment_kernel { + protected: + typedef generic_dense_assignment_kernel Base; + + public: + typedef typename Base::Scalar Scalar; + typedef typename Base::DstXprType DstXprType; + typedef copy_using_evaluator_traits AssignmentTraits; + typedef typename AssignmentTraits::PacketType PacketType; + + EIGEN_DEVICE_FUNC restricted_packet_dense_assignment_kernel(DstEvaluatorTypeT& dst, const SrcEvaluatorTypeT& src, + const Functor& func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) {} +}; + +/*************************************************************************** + * Part 5 : Entry point for dense rectangular assignment + ***************************************************************************/ + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + EIGEN_ONLY_USED_FOR_DEBUG(dst); + EIGEN_ONLY_USED_FOR_DEBUG(src); + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize_if_allowed(DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if (((dst.rows() != dstRows) || (dst.cols() != dstCols))) dst.resize(dstRows, dstCols); + eigen_assert(dst.rows() == dstRows && dst.cols() == dstCols); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, + const Functor& func) { + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + SrcEvaluatorType srcEvaluator(src); + + // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, + // we need to resize the destination after the source evaluator has been created. + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + + typedef generic_dense_assignment_kernel Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src) { + call_dense_assignment_loop(dst, src, internal::assign_op()); +} + +/*************************************************************************** + * Part 6 : Generic assignment + ***************************************************************************/ + +// Based on the respective shapes of the destination and source, +// the class AssignmentKind determine the kind of assignment mechanism. +// AssignmentKind must define a Kind typedef. +template +struct AssignmentKind; + +// Assignment kind defined in this file: +struct Dense2Dense {}; +struct EigenBase2EigenBase {}; + +template +struct AssignmentKind { + typedef EigenBase2EigenBase Kind; +}; +template <> +struct AssignmentKind { + typedef Dense2Dense Kind; +}; + +// This is the main assignment class +template ::Shape, + typename evaluator_traits::Shape>::Kind, + typename EnableIf = void> +struct Assignment; + +// The only purpose of this call_assignment() function is to deal with noalias() / "assume-aliasing" and automatic +// transposition. Indeed, I (Gael) think that this concept of "assume-aliasing" was a mistake, and it makes thing quite +// complicated. So this intermediate function removes everything related to "assume-aliasing" such that Assignment does +// not has to bother about these annoying details. + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(Dst& dst, const Src& src) { + call_assignment(dst, src, internal::assign_op()); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_assignment(const Dst& dst, const Src& src) { + call_assignment(dst, src, internal::assign_op()); +} + +// Deal with "assume-aliasing" +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment( + Dst& dst, const Src& src, const Func& func, std::enable_if_t::value, void*> = 0) { + typename plain_matrix_type::type tmp(src); + call_assignment_no_alias(dst, tmp, func); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment( + Dst& dst, const Src& src, const Func& func, std::enable_if_t::value, void*> = 0) { + call_assignment_no_alias(dst, src, func); +} + +// by-pass "assume-aliasing" +// When there is no aliasing, we require that 'dst' has been properly resized +template class StorageBase, typename Src, typename Func> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment(NoAlias& dst, const Src& src, + const Func& func) { + call_assignment_no_alias(dst.expression(), src, func); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src, + const Func& func) { + enum { + NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) || + (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) && + int(Dst::SizeAtCompileTime) != 1 + }; + + typedef std::conditional_t, Dst> ActualDstTypeCleaned; + typedef std::conditional_t, Dst&> ActualDstType; + ActualDstType actualDst(dst); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar); + + Assignment::run(actualDst, src, func); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_restricted_packet_assignment_no_alias(Dst& dst, const Src& src, + const Func& func) { + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + typedef restricted_packet_dense_assignment_kernel Kernel; + + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar); + + SrcEvaluatorType srcEvaluator(src); + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop::run(kernel); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(Dst& dst, const Src& src) { + call_assignment_no_alias(dst, src, internal::assign_op()); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src, + const Func& func) { + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Dst, Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename Dst::Scalar, typename Src::Scalar); + + Assignment::run(dst, src, func); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias_no_transpose(Dst& dst, const Src& src) { + call_assignment_no_alias_no_transpose(dst, src, internal::assign_op()); +} + +// forward declaration +template +EIGEN_DEVICE_FUNC void check_for_aliasing(const Dst& dst, const Src& src); + +// Generic Dense to Dense assignment +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template +struct Assignment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE constexpr void run(DstXprType& dst, const SrcXprType& src, + const Functor& func) { +#ifndef EIGEN_NO_DEBUG + if (!internal::is_constant_evaluated()) { + internal::check_for_aliasing(dst, src); + } +#endif + + call_dense_assignment_loop(dst, src, func); + } +}; + +template +struct Assignment, SrcPlainObject>, + assign_op, Dense2Dense, Weak> { + using Scalar = typename DstXprType::Scalar; + using NullaryOp = scalar_constant_op; + using SrcXprType = CwiseNullaryOp; + using Functor = assign_op; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + eigen_fill_impl::run(dst, src); + } +}; + +template +struct Assignment, SrcPlainObject>, + assign_op, Dense2Dense, Weak> { + using Scalar = typename DstXprType::Scalar; + using NullaryOp = scalar_zero_op; + using SrcXprType = CwiseNullaryOp; + using Functor = assign_op; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const Functor& /*func*/) { + eigen_zero_impl::run(dst, src); + } +}; + +// Generic assignment through evalTo. +// TODO: not sure we have to keep that one, but it helps porting current code to new evaluator mechanism. +// Note that the last template argument "Weak" is needed to make it possible to perform +// both partial specialization+SFINAE without ambiguous specialization +template +struct Assignment { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run( + DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.evalTo(dst); + } + + // NOTE The following two functions are templated to avoid their instantiation if not needed + // This is needed because some expressions supports evalTo only and/or have 'void' as scalar type. + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run( + DstXprType& dst, const SrcXprType& src, + const internal::add_assign_op& /*func*/) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.addTo(dst); + } + + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run( + DstXprType& dst, const SrcXprType& src, + const internal::sub_assign_op& /*func*/) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + src.subTo(dst); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_EVALUATOR_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Assign_AOCL.h b/o-voxel/third_party/eigen/Eigen/src/Core/Assign_AOCL.h new file mode 100644 index 0000000000000000000000000000000000000000..a6963474403c79df99f0fb074276f5016f6cc5e6 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Assign_AOCL.h @@ -0,0 +1,301 @@ +/* + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at https://mozilla.org/MPL/2.0/. + * + * Assign_AOCL.h - AOCL Vectorized Math Dispatch Layer for Eigen + * + * Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + * + * Description: + * ------------ + * This file implements a high-performance dispatch layer that automatically + * routes Eigen's element-wise mathematical operations to AMD Optimizing CPU + * Libraries (AOCL) Vector Math Library (VML) functions when beneficial for + * performance. + * + * The dispatch system uses C++ template specialization to intercept Eigen's + * assignment operations and redirect them to AOCL's VRDA functions, which + * provide optimized implementations for AMD Zen architectures. + * + * Key Features: + * ------------- + * 1. Automatic Dispatch: Seamlessly routes supported operations to AOCL without + * requiring code changes in user applications + * + * 2. Performance Optimization: Uses AOCL VRDA functions optimized for Zen + * family processors with automatic SIMD instruction selection (AVX2, AVX-512) + * + * 3. Threshold-Based Activation: Only activates for vectors larger than + * EIGEN_AOCL_VML_THRESHOLD (default: 128 elements) to avoid overhead on + * small vectors + * + * 4. Precision-Specific Handling: + * - Double precision: AOCL VRDA vectorized functions + * - Single precision: Scalar fallback (preserves correctness) + * + * 5. Memory Layout Compatibility: Ensures direct memory access and compatible + * storage orders between source and destination for optimal performance + * + * Supported Operations: + * --------------------- + * UNARY OPERATIONS (vector → vector): + * - Transcendental: exp(), sin(), cos(), sqrt(), log(), log10(), log2() + * + * BINARY OPERATIONS (vector op vector → vector): + * - Arithmetic: +, *, pow() + * + * Template Specialization Mechanism: + * ----------------------------------- + * The system works by specializing Eigen's Assignment template for: + * 1. CwiseUnaryOp with scalar_*_op functors (unary operations) + * 2. CwiseBinaryOp with scalar_*_op functors (binary operations) + * 3. Dense2Dense assignment context with AOCL-compatible traits + * + * Dispatch conditions (all must be true): + * - Source and destination have DirectAccessBit (contiguous memory) + * - Compatible storage orders (both row-major or both column-major) + * - Vector size ≥ EIGEN_AOCL_VML_THRESHOLD or Dynamic size + * - Supported data type (currently double precision for VRDA) + * + * Integration Example: + * -------------------- + * // Standard Eigen code - no changes required + * VectorXd x = VectorXd::Random(10000); + * VectorXd y = VectorXd::Random(10000); + * VectorXd result; + * + * // These operations are automatically dispatched to AOCL: + * result = x.array().exp(); // → amd_vrda_exp() + * result = x.array().sin(); // → amd_vrda_sin() + * result = x.array() + y.array(); // → amd_vrda_add() + * result = x.array().pow(y.array()); // → amd_vrda_pow() + * + * Configuration: + * -------------- + * Required preprocessor definitions: + * - EIGEN_USE_AOCL_ALL or EIGEN_USE_AOCL_MT: Enable AOCL integration + * - EIGEN_USE_AOCL_VML: Enable Vector Math Library dispatch + * + * Compilation Requirements: + * ------------------------- + * Include paths: + * - AOCL headers: -I${AOCL_ROOT}/include + * - Eigen headers: -I/path/to/eigen + * + * Link libraries: + * - AOCL MathLib: -lamdlibm + * - Standard math: -lm + * + * Compiler flags: + * - Optimization: -O3 (required for inlining) + * - Architecture: -march=znver5 or -march=native + * - Vectorization: -mfma -mavx512f (if supported) + * + * Platform Support: + * ------------------ + * - Primary: Linux x86_64 with AMD Zen family processors + * - Compilers: GCC 8+, Clang 10+, AOCC (recommended) + * - AOCL Version: 4.0+ (with VRDA support) + * + * Error Handling: + * --------------- + * - Graceful fallback to scalar operations for unsupported configurations + * - Compile-time detection of AOCL availability + * - Runtime size and alignment validation with eigen_assert() + * + * Developer: + * ---------- + * Name: Sharad Saurabh Bhaskar + * Email: shbhaska@amd.com + * Organization: Advanced Micro Devices, Inc. + */ + + +#ifndef EIGEN_ASSIGN_AOCL_H +#define EIGEN_ASSIGN_AOCL_H + +namespace Eigen { +namespace internal { + +// Traits for unary operations. +template class aocl_assign_traits { +private: + enum { + DstHasDirectAccess = !!(Dst::Flags & DirectAccessBit), + SrcHasDirectAccess = !!(Src::Flags & DirectAccessBit), + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = Dst::IsVectorAtCompileTime ? int(Dst::SizeAtCompileTime) + : (Dst::Flags & RowMajorBit) ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + LargeEnough = + (InnerSize == Dynamic) || (InnerSize >= EIGEN_AOCL_VML_THRESHOLD) + }; + +public: + enum { + EnableAoclVML = DstHasDirectAccess && SrcHasDirectAccess && + StorageOrdersAgree && LargeEnough, + Traversal = LinearTraversal + }; +}; + +// Traits for binary operations (e.g., add, pow). +template +class aocl_assign_binary_traits { +private: + enum { + DstHasDirectAccess = !!(Dst::Flags & DirectAccessBit), + LhsHasDirectAccess = !!(Lhs::Flags & DirectAccessBit), + RhsHasDirectAccess = !!(Rhs::Flags & DirectAccessBit), + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Lhs::IsRowMajor)) && + (int(Dst::IsRowMajor) == int(Rhs::IsRowMajor)), + InnerSize = Dst::IsVectorAtCompileTime ? int(Dst::SizeAtCompileTime) + : (Dst::Flags & RowMajorBit) ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + LargeEnough = + (InnerSize == Dynamic) || (InnerSize >= EIGEN_AOCL_VML_THRESHOLD) + }; + +public: + enum { + EnableAoclVML = DstHasDirectAccess && LhsHasDirectAccess && + RhsHasDirectAccess && StorageOrdersAgree && LargeEnough + }; +}; + +// Unary operation dispatch for float (scalar fallback). +#define EIGEN_AOCL_VML_UNARY_CALL_FLOAT(EIGENOP) \ + template \ + struct Assignment< \ + DstXprType, CwiseUnaryOp, SrcXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t< \ + aocl_assign_traits::EnableAoclVML>> { \ + typedef CwiseUnaryOp, SrcXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + if (n <= 0) \ + return; \ + const float *input = \ + reinterpret_cast(src.nestedExpression().data()); \ + float *output = reinterpret_cast(dst.data()); \ + for (Eigen::Index i = 0; i < n; ++i) { \ + output[i] = std::EIGENOP(input[i]); \ + } \ + } \ + }; + +// Unary operation dispatch for double (AOCL vectorized). +#define EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(EIGENOP, AOCLOP) \ + template \ + struct Assignment< \ + DstXprType, CwiseUnaryOp, SrcXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t< \ + aocl_assign_traits::EnableAoclVML>> { \ + typedef CwiseUnaryOp, SrcXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + eigen_assert(n <= INT_MAX && "AOCL does not support arrays larger than INT_MAX"); \ + if (n <= 0) \ + return; \ + const double *input = \ + reinterpret_cast(src.nestedExpression().data()); \ + double *output = reinterpret_cast(dst.data()); \ + int aocl_n = internal::convert_index(n); \ + AOCLOP(aocl_n, const_cast(input), output); \ + } \ + }; + +// Instantiate unary calls for float (scalar). +// EIGEN_AOCL_VML_UNARY_CALL_FLOAT(exp) + +// Instantiate unary calls for double (AOCL vectorized). +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(exp2, amd_vrda_exp2) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(exp, amd_vrda_exp) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(sin, amd_vrda_sin) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(cos, amd_vrda_cos) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(sqrt, amd_vrda_sqrt) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(cbrt, amd_vrda_cbrt) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(abs, amd_vrda_fabs) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log, amd_vrda_log) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log10, amd_vrda_log10) +EIGEN_AOCL_VML_UNARY_CALL_DOUBLE(log2, amd_vrda_log2) + +// Binary operation dispatch for float (scalar fallback). +#define EIGEN_AOCL_VML_BINARY_CALL_FLOAT(EIGENOP, STDFUNC) \ + template \ + struct Assignment< \ + DstXprType, \ + CwiseBinaryOp, LhsXprNested, \ + RhsXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t::EnableAoclVML>> { \ + typedef CwiseBinaryOp, LhsXprNested, \ + RhsXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + if (n <= 0) \ + return; \ + const float *lhs = reinterpret_cast(src.lhs().data()); \ + const float *rhs = reinterpret_cast(src.rhs().data()); \ + float *output = reinterpret_cast(dst.data()); \ + for (Eigen::Index i = 0; i < n; ++i) { \ + output[i] = STDFUNC(lhs[i], rhs[i]); \ + } \ + } \ + }; + +// Binary operation dispatch for double (AOCL vectorized). +#define EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(EIGENOP, AOCLOP) \ + template \ + struct Assignment< \ + DstXprType, \ + CwiseBinaryOp, LhsXprNested, \ + RhsXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t::EnableAoclVML>> { \ + typedef CwiseBinaryOp, LhsXprNested, \ + RhsXprNested> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, \ + const assign_op &) { \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + Eigen::Index n = dst.size(); \ + eigen_assert(n <= INT_MAX && "AOCL does not support arrays larger than INT_MAX"); \ + if (n <= 0) \ + return; \ + const double *lhs = reinterpret_cast(src.lhs().data()); \ + const double *rhs = reinterpret_cast(src.rhs().data()); \ + double *output = reinterpret_cast(dst.data()); \ + int aocl_n = internal::convert_index(n); \ + AOCLOP(aocl_n, const_cast(lhs), const_cast(rhs), output); \ + } \ + }; + +// Instantiate binary calls for float (scalar). +// EIGEN_AOCL_VML_BINARY_CALL_FLOAT(sum, std::plus) // Using +// scalar_sum_op for addition EIGEN_AOCL_VML_BINARY_CALL_FLOAT(pow, std::pow) + +// Instantiate binary calls for double (AOCL vectorized). +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(sum, amd_vrda_add) // Using scalar_sum_op for addition +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(pow, amd_vrda_pow) +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(max, amd_vrda_fmax) +EIGEN_AOCL_VML_BINARY_CALL_DOUBLE(min, amd_vrda_fmin) + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_ASSIGN_AOCL_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Assign_MKL.h b/o-voxel/third_party/eigen/Eigen/src/Core/Assign_MKL.h new file mode 100644 index 0000000000000000000000000000000000000000..4c809df6e8804edd249e9669ff187dbde7e049cf --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Assign_MKL.h @@ -0,0 +1,183 @@ +/* + Copyright (c) 2011, Intel Corporation. All rights reserved. + Copyright (C) 2015 Gael Guennebaud + + Redistribution and use in source and binary forms, with or without modification, + are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON + ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + ******************************************************************************** + * Content : Eigen bindings to Intel(R) MKL + * MKL VML support for coefficient-wise unary Eigen expressions like a=b.sin() + ******************************************************************************** +*/ + +#ifndef EIGEN_ASSIGN_VML_H +#define EIGEN_ASSIGN_VML_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +class vml_assign_traits { + private: + enum { + DstHasDirectAccess = Dst::Flags & DirectAccessBit, + SrcHasDirectAccess = Src::Flags & DirectAccessBit, + StorageOrdersAgree = (int(Dst::IsRowMajor) == int(Src::IsRowMajor)), + InnerSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::SizeAtCompileTime) + : int(Dst::Flags) & RowMajorBit ? int(Dst::ColsAtCompileTime) + : int(Dst::RowsAtCompileTime), + InnerMaxSize = int(Dst::IsVectorAtCompileTime) ? int(Dst::MaxSizeAtCompileTime) + : int(Dst::Flags) & RowMajorBit ? int(Dst::MaxColsAtCompileTime) + : int(Dst::MaxRowsAtCompileTime), + MaxSizeAtCompileTime = Dst::SizeAtCompileTime, + + MightEnableVml = bool(StorageOrdersAgree) && bool(DstHasDirectAccess) && bool(SrcHasDirectAccess) && + Src::InnerStrideAtCompileTime == 1 && Dst::InnerStrideAtCompileTime == 1, + MightLinearize = bool(MightEnableVml) && (int(Dst::Flags) & int(Src::Flags) & LinearAccessBit), + VmlSize = bool(MightLinearize) ? MaxSizeAtCompileTime : InnerMaxSize, + LargeEnough = (VmlSize == Dynamic) || VmlSize >= EIGEN_MKL_VML_THRESHOLD + }; + + public: + enum { EnableVml = MightEnableVml && LargeEnough, Traversal = MightLinearize ? LinearTraversal : DefaultTraversal }; +}; + +#define EIGEN_PP_EXPAND(ARG) ARG +#if !defined(EIGEN_FAST_MATH) || (EIGEN_FAST_MATH != 1) +#define EIGEN_VMLMODE_EXPAND_xLA , VML_HA +#else +#define EIGEN_VMLMODE_EXPAND_xLA , VML_LA +#endif + +#define EIGEN_VMLMODE_EXPAND_x_ + +#define EIGEN_VMLMODE_PREFIX_xLA vm +#define EIGEN_VMLMODE_PREFIX_x_ v +#define EIGEN_VMLMODE_PREFIX(VMLMODE) EIGEN_CAT(EIGEN_VMLMODE_PREFIX_x, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template \ + struct Assignment, SrcXprNested>, \ + assign_op, Dense2Dense, \ + std::enable_if_t::EnableVml>> { \ + typedef CwiseUnaryOp, SrcXprNested> SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ + resize_if_allowed(dst, src, func); \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + if (vml_assign_traits::Traversal == (int)LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE *)src.nestedExpression().data(), \ + (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for (Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = src.IsRowMajor ? &(src.nestedExpression().coeffRef(outer, 0)) \ + : &(src.nestedExpression().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \ + VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, \ + (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \ + } \ + } \ + } \ + }; + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), s##VMLOP), float, float, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), d##VMLOP), double, double, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), c##VMLOP), scomplex, \ + MKL_Complex8, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALL(EIGENOP, EIGEN_CAT(EIGEN_VMLMODE_PREFIX(VMLMODE), z##VMLOP), dcomplex, \ + MKL_Complex16, VMLMODE) + +#define EIGEN_MKL_VML_DECLARE_UNARY_CALLS(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(EIGENOP, VMLOP, VMLMODE) \ + EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(EIGENOP, VMLOP, VMLMODE) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sin, Sin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(asin, Asin, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sinh, Sinh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cos, Cos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(acos, Acos, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(cosh, Cosh, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tan, Tan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(atan, Atan, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(tanh, Tanh, LA) +// EIGEN_MKL_VML_DECLARE_UNARY_CALLS(abs, Abs, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(exp, Exp, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log, Ln, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(log10, Log10, LA) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS(sqrt, Sqrt, _) + +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(square, Sqr, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_CPLX(arg, Arg, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(round, Round, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(floor, Floor, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(ceil, Ceil, _) +EIGEN_MKL_VML_DECLARE_UNARY_CALLS_REAL(cbrt, Cbrt, _) + +#define EIGEN_MKL_VML_DECLARE_POW_CALL(EIGENOP, VMLOP, EIGENTYPE, VMLTYPE, VMLMODE) \ + template \ + struct Assignment, SrcXprNested, \ + const CwiseNullaryOp, Plain>>, \ + assign_op, Dense2Dense, \ + std::enable_if_t::EnableVml>> { \ + typedef CwiseBinaryOp, SrcXprNested, \ + const CwiseNullaryOp, Plain>> \ + SrcXprType; \ + static void run(DstXprType &dst, const SrcXprType &src, const assign_op &func) { \ + resize_if_allowed(dst, src, func); \ + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); \ + VMLTYPE exponent = reinterpret_cast(src.rhs().functor().m_other); \ + if (vml_assign_traits::Traversal == LinearTraversal) { \ + VMLOP(dst.size(), (const VMLTYPE *)src.lhs().data(), exponent, \ + (VMLTYPE *)dst.data() EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \ + } else { \ + const Index outerSize = dst.outerSize(); \ + for (Index outer = 0; outer < outerSize; ++outer) { \ + const EIGENTYPE *src_ptr = \ + src.IsRowMajor ? &(src.lhs().coeffRef(outer, 0)) : &(src.lhs().coeffRef(0, outer)); \ + EIGENTYPE *dst_ptr = dst.IsRowMajor ? &(dst.coeffRef(outer, 0)) : &(dst.coeffRef(0, outer)); \ + VMLOP(dst.innerSize(), (const VMLTYPE *)src_ptr, exponent, \ + (VMLTYPE *)dst_ptr EIGEN_PP_EXPAND(EIGEN_VMLMODE_EXPAND_x##VMLMODE)); \ + } \ + } \ + } \ + }; + +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmsPowx, float, float, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmdPowx, double, double, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmcPowx, scomplex, MKL_Complex8, LA) +EIGEN_MKL_VML_DECLARE_POW_CALL(pow, vmzPowx, dcomplex, MKL_Complex16, LA) + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_ASSIGN_VML_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/BandMatrix.h b/o-voxel/third_party/eigen/Eigen/src/Core/BandMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..2e53fa0b5dfdb62281fc380622a429cb4d60d074 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/BandMatrix.h @@ -0,0 +1,338 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BANDMATRIX_H +#define EIGEN_BANDMATRIX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +class BandMatrixBase : public EigenBase { + public: + enum { + Flags = internal::traits::Flags, + CoeffReadCost = internal::traits::CoeffReadCost, + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + Supers = internal::traits::Supers, + Subs = internal::traits::Subs, + Options = internal::traits::Options + }; + typedef typename internal::traits::Scalar Scalar; + typedef Matrix DenseMatrixType; + typedef typename DenseMatrixType::StorageIndex StorageIndex; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef EigenBase Base; + + protected: + enum { + DataRowsAtCompileTime = ((Supers != Dynamic) && (Subs != Dynamic)) ? 1 + Supers + Subs : Dynamic, + SizeAtCompileTime = min_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime) + }; + + public: + using Base::cols; + using Base::derived; + using Base::rows; + + /** \returns the number of super diagonals */ + inline Index supers() const { return derived().supers(); } + + /** \returns the number of sub diagonals */ + inline Index subs() const { return derived().subs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline const CoefficientsType& coeffs() const { return derived().coeffs(); } + + /** \returns an expression of the underlying coefficient matrix */ + inline CoefficientsType& coeffs() { return derived().coeffs(); } + + /** \returns a vector expression of the \a i -th column, + * only the meaningful part is returned. + * \warning the internal storage must be column major. */ + inline Block col(Index i) { + EIGEN_STATIC_ASSERT((int(Options) & int(RowMajor)) == 0, THIS_METHOD_IS_ONLY_FOR_COLUMN_MAJOR_MATRICES); + Index start = 0; + Index len = coeffs().rows(); + if (i <= supers()) { + start = supers() - i; + len = (std::min)(rows(), std::max(0, coeffs().rows() - (supers() - i))); + } else if (i >= rows() - subs()) + len = std::max(0, coeffs().rows() - (i + 1 - rows() + subs())); + return Block(coeffs(), start, i, len, 1); + } + + /** \returns a vector expression of the main diagonal */ + inline Block diagonal() { + return Block(coeffs(), supers(), 0, 1, (std::min)(rows(), cols())); + } + + /** \returns a vector expression of the main diagonal (const version) */ + inline const Block diagonal() const { + return Block(coeffs(), supers(), 0, 1, (std::min)(rows(), cols())); + } + + template + struct DiagonalIntReturnType { + enum { + ReturnOpposite = + (int(Options) & int(SelfAdjoint)) && (((Index) > 0 && Supers == 0) || ((Index) < 0 && Subs == 0)), + Conjugate = ReturnOpposite && NumTraits::IsComplex, + ActualIndex = ReturnOpposite ? -Index : Index, + DiagonalSize = + (RowsAtCompileTime == Dynamic || ColsAtCompileTime == Dynamic) + ? Dynamic + : (ActualIndex < 0 ? min_size_prefer_dynamic(ColsAtCompileTime, RowsAtCompileTime + ActualIndex) + : min_size_prefer_dynamic(RowsAtCompileTime, ColsAtCompileTime - ActualIndex)) + }; + typedef Block BuildType; + typedef std::conditional_t, BuildType>, BuildType> + Type; + }; + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template + inline typename DiagonalIntReturnType::Type diagonal() { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers() - N, (std::max)(0, N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a N -th sub or super diagonal */ + template + inline const typename DiagonalIntReturnType::Type diagonal() const { + return typename DiagonalIntReturnType::BuildType(coeffs(), supers() - N, (std::max)(0, N), 1, diagonalLength(N)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline Block diagonal(Index i) { + eigen_assert((i < 0 && -i <= subs()) || (i >= 0 && i <= supers())); + return Block(coeffs(), supers() - i, std::max(0, i), 1, diagonalLength(i)); + } + + /** \returns a vector expression of the \a i -th sub or super diagonal */ + inline const Block diagonal(Index i) const { + eigen_assert((i < 0 && -i <= subs()) || (i >= 0 && i <= supers())); + return Block(coeffs(), supers() - i, std::max(0, i), 1, + diagonalLength(i)); + } + + template + inline void evalTo(Dest& dst) const { + dst.resize(rows(), cols()); + dst.setZero(); + dst.diagonal() = diagonal(); + for (Index i = 1; i <= supers(); ++i) dst.diagonal(i) = diagonal(i); + for (Index i = 1; i <= subs(); ++i) dst.diagonal(-i) = diagonal(-i); + } + + DenseMatrixType toDenseMatrix() const { + DenseMatrixType res(rows(), cols()); + evalTo(res); + return res; + } + + protected: + inline Index diagonalLength(Index i) const { + return i < 0 ? (std::min)(cols(), rows() + i) : (std::min)(rows(), cols() - i); + } +}; + +/** + * \class BandMatrix + * \ingroup Core_Module + * + * \brief Represents a rectangular matrix with a banded storage + * + * \tparam Scalar_ Numeric type, i.e. float, double, int + * \tparam Rows_ Number of rows, or \b Dynamic + * \tparam Cols_ Number of columns, or \b Dynamic + * \tparam Supers_ Number of super diagonal + * \tparam Subs_ Number of sub diagonal + * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of \b #SelfAdjoint + * The former controls \ref TopicStorageOrders "storage order", and defaults to + * column-major. The latter controls whether the matrix represents a selfadjoint + * matrix in which case either Supers of Subs have to be null. + * + * \sa class TridiagonalMatrix + */ + +template +struct traits > { + typedef Scalar_ Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + enum { + CoeffReadCost = NumTraits::ReadCost, + RowsAtCompileTime = Rows_, + ColsAtCompileTime = Cols_, + MaxRowsAtCompileTime = Rows_, + MaxColsAtCompileTime = Cols_, + Flags = LvalueBit, + Supers = Supers_, + Subs = Subs_, + Options = Options_, + DataRowsAtCompileTime = ((Supers != Dynamic) && (Subs != Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef Matrix + CoefficientsType; +}; + +template +class BandMatrix : public BandMatrixBase > { + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::CoefficientsType CoefficientsType; + + explicit inline BandMatrix(Index rows = Rows, Index cols = Cols, Index supers = Supers, Index subs = Subs) + : m_coeffs(1 + supers + subs, cols), m_rows(rows), m_supers(supers), m_subs(subs) {} + + /** \returns the number of columns */ + constexpr Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + constexpr Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + constexpr Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + constexpr Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + inline CoefficientsType& coeffs() { return m_coeffs; } + + protected: + CoefficientsType m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +template +class BandMatrixWrapper; + +template +struct traits > { + typedef typename CoefficientsType_::Scalar Scalar; + typedef typename CoefficientsType_::StorageKind StorageKind; + typedef typename CoefficientsType_::StorageIndex StorageIndex; + enum { + CoeffReadCost = internal::traits::CoeffReadCost, + RowsAtCompileTime = Rows_, + ColsAtCompileTime = Cols_, + MaxRowsAtCompileTime = Rows_, + MaxColsAtCompileTime = Cols_, + Flags = LvalueBit, + Supers = Supers_, + Subs = Subs_, + Options = Options_, + DataRowsAtCompileTime = ((Supers != Dynamic) && (Subs != Dynamic)) ? 1 + Supers + Subs : Dynamic + }; + typedef CoefficientsType_ CoefficientsType; +}; + +template +class BandMatrixWrapper + : public BandMatrixBase > { + public: + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::CoefficientsType CoefficientsType; + typedef typename internal::traits::StorageIndex StorageIndex; + + explicit inline BandMatrixWrapper(const CoefficientsType& coeffs, Index rows = Rows_, Index cols = Cols_, + Index supers = Supers_, Index subs = Subs_) + : m_coeffs(coeffs), m_rows(rows), m_supers(supers), m_subs(subs) { + EIGEN_UNUSED_VARIABLE(cols); + // eigen_assert(coeffs.cols()==cols() && (supers()+subs()+1)==coeffs.rows()); + } + + /** \returns the number of columns */ + constexpr Index rows() const { return m_rows.value(); } + + /** \returns the number of rows */ + constexpr Index cols() const { return m_coeffs.cols(); } + + /** \returns the number of super diagonals */ + constexpr Index supers() const { return m_supers.value(); } + + /** \returns the number of sub diagonals */ + constexpr Index subs() const { return m_subs.value(); } + + inline const CoefficientsType& coeffs() const { return m_coeffs; } + + protected: + const CoefficientsType& m_coeffs; + internal::variable_if_dynamic m_rows; + internal::variable_if_dynamic m_supers; + internal::variable_if_dynamic m_subs; +}; + +/** + * \class TridiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a tridiagonal matrix with a compact banded storage + * + * \tparam Scalar Numeric type, i.e. float, double, int + * \tparam Size Number of rows and cols, or \b Dynamic + * \tparam Options Can be 0 or \b SelfAdjoint + * + * \sa class BandMatrix + */ +template +class TridiagonalMatrix : public BandMatrix { + typedef BandMatrix Base; + typedef typename Base::StorageIndex StorageIndex; + + public: + explicit TridiagonalMatrix(Index size = Size) : Base(size, size, Options & SelfAdjoint ? 0 : 1, 1) {} + + inline typename Base::template DiagonalIntReturnType<1>::Type super() { return Base::template diagonal<1>(); } + inline const typename Base::template DiagonalIntReturnType<1>::Type super() const { + return Base::template diagonal<1>(); + } + inline typename Base::template DiagonalIntReturnType<-1>::Type sub() { return Base::template diagonal<-1>(); } + inline const typename Base::template DiagonalIntReturnType<-1>::Type sub() const { + return Base::template diagonal<-1>(); + } + + protected: +}; + +struct BandShape {}; + +template +struct evaluator_traits > + : public evaluator_traits_base > { + typedef BandShape Shape; +}; + +template +struct evaluator_traits > + : public evaluator_traits_base > { + typedef BandShape Shape; +}; + +template <> +struct AssignmentKind { + typedef EigenBase2EigenBase Kind; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BANDMATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Block.h b/o-voxel/third_party/eigen/Eigen/src/Core/Block.h new file mode 100644 index 0000000000000000000000000000000000000000..ab1fa63f4d727340503763d559d0afe8a8cbd209 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Block.h @@ -0,0 +1,429 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_BLOCK_H +#define EIGEN_BLOCK_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits> : traits { + typedef typename traits::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type XprTypeNested; + typedef std::remove_reference_t XprTypeNested_; + enum { + MatrixRows = traits::RowsAtCompileTime, + MatrixCols = traits::ColsAtCompileTime, + RowsAtCompileTime = MatrixRows == 0 ? 0 : BlockRows, + ColsAtCompileTime = MatrixCols == 0 ? 0 : BlockCols, + MaxRowsAtCompileTime = BlockRows == 0 ? 0 + : RowsAtCompileTime != Dynamic ? int(RowsAtCompileTime) + : int(traits::MaxRowsAtCompileTime), + MaxColsAtCompileTime = BlockCols == 0 ? 0 + : ColsAtCompileTime != Dynamic ? int(ColsAtCompileTime) + : int(traits::MaxColsAtCompileTime), + + XprTypeIsRowMajor = (int(traits::Flags) & RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 + : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 + : XprTypeIsRowMajor, + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + + // FIXME, this traits is rather specialized for dense object and it needs to be cleaned further + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + Flags = (traits::Flags & (DirectAccessBit | (InnerPanel_ ? CompressedAccessBit : 0))) | FlagsLvalueBit | + FlagsRowMajorBit, + // FIXME DirectAccessBit should not be handled by expressions + // + // Alignment is needed by MapBase's assertions + // We can sefely set it to false here. Internal alignment errors will be detected by an eigen_internal_assert in the + // respective evaluator + Alignment = 0, + InnerPanel = InnerPanel_ ? 1 : 0 + }; +}; + +template ::ret> +class BlockImpl_dense; + +} // end namespace internal + +template +class BlockImpl; + +/** \class Block + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size block + * + * \tparam XprType the type of the expression in which we are taking a block + * \tparam BlockRows the number of rows of the block we are taking at compile time (optional) + * \tparam BlockCols the number of columns of the block we are taking at compile time (optional) + * \tparam InnerPanel is true, if the block maps to a set of rows of a row major matrix or + * to set of columns of a column major matrix (optional). The parameter allows to determine + * at compile time whether aligned access is possible on the block expression. + * + * This class represents an expression of either a fixed-size or dynamic-size block. It is the return + * type of DenseBase::block(Index,Index,Index,Index) and DenseBase::block(Index,Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly manipulate block expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_Block.cpp + * Output: \verbinclude class_Block.out + * + * \note Even though this expression has dynamic size, in the case where \a XprType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedBlock.cpp + * Output: \verbinclude class_FixedBlock.out + * + * \sa DenseBase::block(Index,Index,Index,Index), DenseBase::block(Index,Index), class VectorBlock + */ +template +class Block + : public BlockImpl::StorageKind> { + typedef BlockImpl::StorageKind> Impl; + using BlockHelper = internal::block_xpr_helper; + + public: + // typedef typename Impl::Base Base; + typedef Impl Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Block) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Block) + + typedef internal::remove_all_t NestedExpression; + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index i) : Impl(xpr, i) { + eigen_assert((i >= 0) && (((BlockRows == 1) && (BlockCols == XprType::ColsAtCompileTime) && i < xpr.rows()) || + ((BlockRows == XprType::RowsAtCompileTime) && (BlockCols == 1) && i < xpr.cols()))); + } + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol) + : Impl(xpr, startRow, startCol) { + EIGEN_STATIC_ASSERT(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) + eigen_assert(startRow >= 0 && BlockRows >= 0 && startRow + BlockRows <= xpr.rows() && startCol >= 0 && + BlockCols >= 0 && startCol + BlockCols <= xpr.cols()); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Block(XprType& xpr, Index startRow, Index startCol, Index blockRows, + Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) { + eigen_assert((RowsAtCompileTime == Dynamic || RowsAtCompileTime == blockRows) && + (ColsAtCompileTime == Dynamic || ColsAtCompileTime == blockCols)); + eigen_assert(startRow >= 0 && blockRows >= 0 && startRow <= xpr.rows() - blockRows && startCol >= 0 && + blockCols >= 0 && startCol <= xpr.cols() - blockCols); + } + + // convert nested blocks (e.g. Block>) to a simple block expression (Block) + + using ConstUnwindReturnType = Block; + using UnwindReturnType = Block; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ConstUnwindReturnType unwind() const { + return ConstUnwindReturnType(BlockHelper::base(*this), BlockHelper::row(*this, 0), BlockHelper::col(*this, 0), + this->rows(), this->cols()); + } + + template ::value>> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE UnwindReturnType unwind() { + return UnwindReturnType(BlockHelper::base(*this), BlockHelper::row(*this, 0), BlockHelper::col(*this, 0), + this->rows(), this->cols()); + } +}; + +// The generic default implementation for dense block simply forward to the internal::BlockImpl_dense +// that must be specialized for direct and non-direct access... +template +class BlockImpl + : public internal::BlockImpl_dense { + typedef internal::BlockImpl_dense Impl; + typedef typename XprType::StorageIndex StorageIndex; + + public: + typedef Impl Base; + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index i) : Impl(xpr, i) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol) + : Impl(xpr, startRow, startCol) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl(XprType& xpr, Index startRow, Index startCol, Index blockRows, + Index blockCols) + : Impl(xpr, startRow, startCol, blockRows, blockCols) {} +}; + +namespace internal { + +/** \internal Internal implementation of dense Blocks in the general case. */ +template +class BlockImpl_dense : public internal::dense_xpr_base>::type { + typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + // class InnerIterator; // FIXME apparently never used + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index i) + : m_xpr(xpr), + // It is a row if and only if BlockRows==1 and BlockCols==XprType::ColsAtCompileTime, + // and it is a column if and only if BlockRows==XprType::RowsAtCompileTime and BlockCols==1, + // all other cases are invalid. + // The case a 1x1 matrix seems ambiguous, but the result is the same anyway. + m_startRow((BlockRows == 1) && (BlockCols == XprType::ColsAtCompileTime) ? i : 0), + m_startCol((BlockRows == XprType::RowsAtCompileTime) && (BlockCols == 1) ? i : 0), + m_blockRows(BlockRows == 1 ? 1 : xpr.rows()), + m_blockCols(BlockCols == 1 ? 1 : xpr.cols()) {} + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(BlockRows), m_blockCols(BlockCols) {} + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC inline BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, + Index blockCols) + : m_xpr(xpr), m_startRow(startRow), m_startCol(startCol), m_blockRows(blockRows), m_blockCols(blockCols) {} + + EIGEN_DEVICE_FUNC inline Index rows() const { return m_blockRows.value(); } + EIGEN_DEVICE_FUNC inline Index cols() const { return m_blockCols.value(); } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { + return m_xpr.derived().coeffRef(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const { + return m_xpr.coeff(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { + return m_xpr.coeffRef(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { + return m_xpr.coeff(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + EIGEN_DEVICE_FUNC inline PacketScalar packet(Index rowId, Index colId) const { + return m_xpr.template packet(rowId + m_startRow.value(), colId + m_startCol.value()); + } + + template + EIGEN_DEVICE_FUNC inline void writePacket(Index rowId, Index colId, const PacketScalar& val) { + m_xpr.template writePacket(rowId + m_startRow.value(), colId + m_startCol.value(), val); + } + + template + EIGEN_DEVICE_FUNC inline PacketScalar packet(Index index) const { + return m_xpr.template packet(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0)); + } + + template + EIGEN_DEVICE_FUNC inline void writePacket(Index index, const PacketScalar& val) { + m_xpr.template writePacket(m_startRow.value() + (RowsAtCompileTime == 1 ? 0 : index), + m_startCol.value() + (RowsAtCompileTime == 1 ? index : 0), val); + } + +#ifdef EIGEN_PARSED_BY_DOXYGEN + /** \sa MapBase::data() */ + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const; + EIGEN_DEVICE_FUNC inline Index innerStride() const; + EIGEN_DEVICE_FUNC inline Index outerStride() const; +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const { + return m_xpr; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE XprType& nestedExpression() { return m_xpr; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); } + + protected: + XprTypeNested m_xpr; + const internal::variable_if_dynamic + m_startRow; + const internal::variable_if_dynamic + m_startCol; + const internal::variable_if_dynamic m_blockRows; + const internal::variable_if_dynamic m_blockCols; +}; + +/** \internal Internal implementation of dense Blocks in the direct access case.*/ +template +class BlockImpl_dense + : public MapBase> { + typedef Block BlockType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + enum { XprTypeIsRowMajor = (int(traits::Flags) & RowMajorBit) != 0 }; + + /** \internal Returns base+offset (unless base is null, in which case returns null). + * Adding an offset to nullptr is undefined behavior, so we must avoid it. + */ + template + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE static Scalar* add_to_nullable_pointer(Scalar* base, Index offset) { + return base != nullptr ? base + offset : nullptr; + } + + public: + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(BlockType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(BlockImpl_dense) + + /** Column or Row constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl_dense(XprType& xpr, Index i) + : Base((BlockRows == 0 || BlockCols == 0) + ? nullptr + : add_to_nullable_pointer( + xpr.data(), + i * (((BlockRows == 1) && (BlockCols == XprType::ColsAtCompileTime) && (!XprTypeIsRowMajor)) || + ((BlockRows == XprType::RowsAtCompileTime) && (BlockCols == 1) && + (XprTypeIsRowMajor)) + ? xpr.innerStride() + : xpr.outerStride())), + BlockRows == 1 ? 1 : xpr.rows(), BlockCols == 1 ? 1 : xpr.cols()), + m_xpr(xpr), + m_startRow((BlockRows == 1) && (BlockCols == XprType::ColsAtCompileTime) ? i : 0), + m_startCol((BlockRows == XprType::RowsAtCompileTime) && (BlockCols == 1) ? i : 0) { + init(); + } + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl_dense(XprType& xpr, Index startRow, Index startCol) + : Base((BlockRows == 0 || BlockCols == 0) + ? nullptr + : add_to_nullable_pointer(xpr.data(), + xpr.innerStride() * (XprTypeIsRowMajor ? startCol : startRow) + + xpr.outerStride() * (XprTypeIsRowMajor ? startRow : startCol))), + m_xpr(xpr), + m_startRow(startRow), + m_startCol(startCol) { + init(); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl_dense(XprType& xpr, Index startRow, Index startCol, Index blockRows, + Index blockCols) + : Base((blockRows == 0 || blockCols == 0) + ? nullptr + : add_to_nullable_pointer(xpr.data(), + xpr.innerStride() * (XprTypeIsRowMajor ? startCol : startRow) + + xpr.outerStride() * (XprTypeIsRowMajor ? startRow : startCol)), + blockRows, blockCols), + m_xpr(xpr), + m_startRow(startRow), + m_startCol(startCol) { + init(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const noexcept { + return m_xpr; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE XprType& nestedExpression() { return m_xpr; } + + /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index innerStride() const noexcept { + return internal::traits::HasSameStorageOrderAsXprType ? m_xpr.innerStride() : m_xpr.outerStride(); + } + + /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept { + return internal::traits::HasSameStorageOrderAsXprType ? m_xpr.outerStride() : m_xpr.innerStride(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startRow() const noexcept { return m_startRow.value(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr StorageIndex startCol() const noexcept { return m_startCol.value(); } + +#ifndef __SUNPRO_CC + // FIXME sunstudio is not friendly with the above friend... + // META-FIXME there is no 'friend' keyword around here. Is this obsolete? + protected: +#endif + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal used by allowAligned() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE BlockImpl_dense(XprType& xpr, const Scalar* data, Index blockRows, + Index blockCols) + : Base(data, blockRows, blockCols), m_xpr(xpr) { + init(); + } +#endif + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void init() { + m_outerStride = + internal::traits::HasSameStorageOrderAsXprType ? m_xpr.outerStride() : m_xpr.innerStride(); + } + + XprTypeNested m_xpr; + const internal::variable_if_dynamic + m_startRow; + const internal::variable_if_dynamic + m_startCol; + Index m_outerStride; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_BLOCK_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CommaInitializer.h b/o-voxel/third_party/eigen/Eigen/src/Core/CommaInitializer.h new file mode 100644 index 0000000000000000000000000000000000000000..dc544881241dbd59187a5bd01b01b60e275ca3b7 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CommaInitializer.h @@ -0,0 +1,149 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COMMAINITIALIZER_H +#define EIGEN_COMMAINITIALIZER_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class CommaInitializer + * \ingroup Core_Module + * + * \brief Helper class used by the comma initializer operator + * + * This class is internally used to implement the comma initializer feature. It is + * the return type of MatrixBase::operator<<, and most of the time this is the only + * way it is used. + * + * \sa \blank \ref MatrixBaseCommaInitRef "MatrixBase::operator<<", CommaInitializer::finished() + */ +template +struct CommaInitializer { + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const Scalar& s) + : m_xpr(xpr), m_row(0), m_col(1), m_currentBlockRows(1) { + eigen_assert(m_xpr.rows() > 0 && m_xpr.cols() > 0 && "Cannot comma-initialize a 0x0 matrix (operator<<)"); + m_xpr.coeffRef(0, 0) = s; + } + + template + EIGEN_DEVICE_FUNC inline CommaInitializer(XprType& xpr, const DenseBase& other) + : m_xpr(xpr), m_row(0), m_col(other.cols()), m_currentBlockRows(other.rows()) { + eigen_assert(m_xpr.rows() >= other.rows() && m_xpr.cols() >= other.cols() && + "Cannot comma-initialize a 0x0 matrix (operator<<)"); + m_xpr.template block(0, 0, other.rows(), + other.cols()) = other; + } + + /* Copy/Move constructor which transfers ownership. This is crucial in + * absence of return value optimization to avoid assertions during destruction. */ + // FIXME in C++11 mode this could be replaced by a proper RValue constructor + EIGEN_DEVICE_FUNC inline CommaInitializer(const CommaInitializer& o) + : m_xpr(o.m_xpr), m_row(o.m_row), m_col(o.m_col), m_currentBlockRows(o.m_currentBlockRows) { + // Mark original object as finished. In absence of R-value references we need to const_cast: + const_cast(o).m_row = m_xpr.rows(); + const_cast(o).m_col = m_xpr.cols(); + const_cast(o).m_currentBlockRows = 0; + } + + /* inserts a scalar value in the target matrix */ + EIGEN_DEVICE_FUNC CommaInitializer &operator,(const Scalar& s) { + if (m_col == m_xpr.cols()) { + m_row += m_currentBlockRows; + m_col = 0; + m_currentBlockRows = 1; + eigen_assert(m_row < m_xpr.rows() && "Too many rows passed to comma initializer (operator<<)"); + } + eigen_assert(m_col < m_xpr.cols() && "Too many coefficients passed to comma initializer (operator<<)"); + eigen_assert(m_currentBlockRows == 1); + m_xpr.coeffRef(m_row, m_col++) = s; + return *this; + } + + /* inserts a matrix expression in the target matrix */ + template + EIGEN_DEVICE_FUNC CommaInitializer &operator,(const DenseBase& other) { + if (m_col == m_xpr.cols() && (other.cols() != 0 || other.rows() != m_currentBlockRows)) { + m_row += m_currentBlockRows; + m_col = 0; + m_currentBlockRows = other.rows(); + eigen_assert(m_row + m_currentBlockRows <= m_xpr.rows() && + "Too many rows passed to comma initializer (operator<<)"); + } + eigen_assert((m_col + other.cols() <= m_xpr.cols()) && + "Too many coefficients passed to comma initializer (operator<<)"); + eigen_assert(m_currentBlockRows == other.rows()); + m_xpr.template block(m_row, m_col, other.rows(), + other.cols()) = other; + m_col += other.cols(); + return *this; + } + + EIGEN_DEVICE_FUNC inline ~CommaInitializer() +#if defined VERIFY_RAISES_ASSERT && (!defined EIGEN_NO_ASSERTION_CHECKING) && defined EIGEN_EXCEPTIONS + noexcept(false) // Eigen::eigen_assert_exception +#endif + { + finished(); + } + + /** \returns the built matrix once all its coefficients have been set. + * Calling finished is 100% optional. Its purpose is to write expressions + * like this: + * \code + * quaternion.fromRotationMatrix((Matrix3f() << axis0, axis1, axis2).finished()); + * \endcode + */ + EIGEN_DEVICE_FUNC inline XprType& finished() { + eigen_assert(((m_row + m_currentBlockRows) == m_xpr.rows() || m_xpr.cols() == 0) && m_col == m_xpr.cols() && + "Too few coefficients passed to comma initializer (operator<<)"); + return m_xpr; + } + + XprType& m_xpr; // target expression + Index m_row; // current row id + Index m_col; // current col id + Index m_currentBlockRows; // current block height +}; + +/** \anchor MatrixBaseCommaInitRef + * Convenient operator to set the coefficients of a matrix. + * + * The coefficients must be provided in a row major order and exactly match + * the size of the matrix. Otherwise an assertion is raised. + * + * Example: \include MatrixBase_set.cpp + * Output: \verbinclude MatrixBase_set.out + * + * \note According the c++ standard, the argument expressions of this comma initializer are evaluated in arbitrary + * order. + * + * \sa CommaInitializer::finished(), class CommaInitializer + */ +template +EIGEN_DEVICE_FUNC inline CommaInitializer DenseBase::operator<<(const Scalar& s) { + return CommaInitializer(*static_cast(this), s); +} + +/** \sa operator<<(const Scalar&) */ +template +template +EIGEN_DEVICE_FUNC inline CommaInitializer DenseBase::operator<<( + const DenseBase& other) { + return CommaInitializer(*static_cast(this), other); +} + +} // end namespace Eigen + +#endif // EIGEN_COMMAINITIALIZER_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ConditionEstimator.h b/o-voxel/third_party/eigen/Eigen/src/Core/ConditionEstimator.h new file mode 100644 index 0000000000000000000000000000000000000000..efd19f8b732abac08faa8d8d9b143113109b96af --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ConditionEstimator.h @@ -0,0 +1,173 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2016 Rasmus Munk Larsen (rmlarsen@google.com) +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CONDITIONESTIMATOR_H +#define EIGEN_CONDITIONESTIMATOR_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + const RealVector v_abs = v.cwiseAbs(); + return (v_abs.array() == static_cast(0)) + .select(Vector::Ones(v.size()), v.cwiseQuotient(v_abs)); + } +}; + +// Partial specialization to avoid elementwise division for real vectors. +template +struct rcond_compute_sign { + static inline Vector run(const Vector& v) { + return (v.array() < static_cast(0)) + .select(-Vector::Ones(v.size()), Vector::Ones(v.size())); + } +}; + +/** + * \returns an estimate of ||inv(matrix)||_1 given a decomposition of + * \a matrix that implements .solve() and .adjoint().solve() methods. + * + * This function implements Algorithms 4.1 and 5.1 from + * http://www.maths.manchester.ac.uk/~higham/narep/narep135.pdf + * which also forms the basis for the condition number estimators in + * LAPACK. Since at most 10 calls to the solve method of dec are + * performed, the total cost is O(dims^2), as opposed to O(dims^3) + * needed to compute the inverse matrix explicitly. + * + * The most common usage is in estimating the condition number + * ||matrix||_1 * ||inv(matrix)||_1. The first term ||matrix||_1 can be + * computed directly in O(n^2) operations. + * + * Supports the following decompositions: FullPivLU, PartialPivLU, LDLT, and + * LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar rcond_invmatrix_L1_norm_estimate(const Decomposition& dec) { + typedef typename Decomposition::MatrixType MatrixType; + typedef typename Decomposition::Scalar Scalar; + typedef typename Decomposition::RealScalar RealScalar; + typedef typename internal::plain_col_type::type Vector; + typedef typename internal::plain_col_type::type RealVector; + const bool is_complex = (NumTraits::IsComplex != 0); + + eigen_assert(dec.rows() == dec.cols()); + const Index n = dec.rows(); + if (n == 0) return 0; + + // Disable Index to float conversion warning +#ifdef __INTEL_COMPILER +#pragma warning push +#pragma warning(disable : 2259) +#endif + Vector v = dec.solve(Vector::Ones(n) / Scalar(n)); +#ifdef __INTEL_COMPILER +#pragma warning pop +#endif + + // lower_bound is a lower bound on + // ||inv(matrix)||_1 = sup_v ||inv(matrix) v||_1 / ||v||_1 + // and is the objective maximized by the ("super-") gradient ascent + // algorithm below. + RealScalar lower_bound = v.template lpNorm<1>(); + if (n == 1) return lower_bound; + + // Gradient ascent algorithm follows: We know that the optimum is achieved at + // one of the simplices v = e_i, so in each iteration we follow a + // super-gradient to move towards the optimal one. + RealScalar old_lower_bound = lower_bound; + Vector sign_vector(n); + Vector old_sign_vector; + Index v_max_abs_index = -1; + Index old_v_max_abs_index = v_max_abs_index; + for (int k = 0; k < 4; ++k) { + sign_vector = internal::rcond_compute_sign::run(v); + if (k > 0 && !is_complex && sign_vector == old_sign_vector) { + // Break if the solution stagnated. + break; + } + // v_max_abs_index = argmax |real( inv(matrix)^T * sign_vector )| + v = dec.adjoint().solve(sign_vector); + v.real().cwiseAbs().maxCoeff(&v_max_abs_index); + if (v_max_abs_index == old_v_max_abs_index) { + // Break if the solution stagnated. + break; + } + // Move to the new simplex e_j, where j = v_max_abs_index. + v = dec.solve(Vector::Unit(n, v_max_abs_index)); // v = inv(matrix) * e_j. + lower_bound = v.template lpNorm<1>(); + if (lower_bound <= old_lower_bound) { + // Break if the gradient step did not increase the lower_bound. + break; + } + if (!is_complex) { + old_sign_vector = sign_vector; + } + old_v_max_abs_index = v_max_abs_index; + old_lower_bound = lower_bound; + } + // The following calculates an independent estimate of ||matrix||_1 by + // multiplying matrix by a vector with entries of slowly increasing + // magnitude and alternating sign: + // v_i = (-1)^{i} (1 + (i / (dim-1))), i = 0,...,dim-1. + // This improvement to Hager's algorithm above is due to Higham. It was + // added to make the algorithm more robust in certain corner cases where + // large elements in the matrix might otherwise escape detection due to + // exact cancellation (especially when op and op_adjoint correspond to a + // sequence of backsubstitutions and permutations), which could cause + // Hager's algorithm to vastly underestimate ||matrix||_1. + Scalar alternating_sign(RealScalar(1)); + for (Index i = 0; i < n; ++i) { + // The static_cast is needed when Scalar is a complex and RealScalar implements expression templates + v[i] = alternating_sign * static_cast(RealScalar(1) + (RealScalar(i) / (RealScalar(n - 1)))); + alternating_sign = -alternating_sign; + } + v = dec.solve(v); + const RealScalar alternate_lower_bound = (2 * v.template lpNorm<1>()) / (3 * RealScalar(n)); + return numext::maxi(lower_bound, alternate_lower_bound); +} + +/** \brief Reciprocal condition number estimator. + * + * Computing a decomposition of a dense matrix takes O(n^3) operations, while + * this method estimates the condition number quickly and reliably in O(n^2) + * operations. + * + * \returns an estimate of the reciprocal condition number + * (1 / (||matrix||_1 * ||inv(matrix)||_1)) of matrix, given ||matrix||_1 and + * its decomposition. Supports the following decompositions: FullPivLU, + * PartialPivLU, LDLT, and LLT. + * + * \sa FullPivLU, PartialPivLU, LDLT, LLT. + */ +template +typename Decomposition::RealScalar rcond_estimate_helper(typename Decomposition::RealScalar matrix_norm, + const Decomposition& dec) { + typedef typename Decomposition::RealScalar RealScalar; + eigen_assert(dec.rows() == dec.cols()); + if (dec.rows() == 0) return NumTraits::infinity(); + if (numext::is_exactly_zero(matrix_norm)) return RealScalar(0); + if (dec.rows() == 1) return RealScalar(1); + const RealScalar inverse_matrix_norm = rcond_invmatrix_L1_norm_estimate(dec); + return (numext::is_exactly_zero(inverse_matrix_norm) ? RealScalar(0) + : (RealScalar(1) / inverse_matrix_norm) / matrix_norm); +} + +} // namespace internal + +} // namespace Eigen + +#endif diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CoreEvaluators.h b/o-voxel/third_party/eigen/Eigen/src/Core/CoreEvaluators.h new file mode 100644 index 0000000000000000000000000000000000000000..90c0a98c8a6616703a2b0a2307200d2c8f7af76d --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CoreEvaluators.h @@ -0,0 +1,2018 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011 Benoit Jacob +// Copyright (C) 2011-2014 Gael Guennebaud +// Copyright (C) 2011-2012 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COREEVALUATORS_H +#define EIGEN_COREEVALUATORS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// This class returns the evaluator kind from the expression storage kind. +// Default assumes index based accessors +template +struct storage_kind_to_evaluator_kind { + typedef IndexBased Kind; +}; + +// This class returns the evaluator shape from the expression storage kind. +// It can be Dense, Sparse, Triangular, Diagonal, SelfAdjoint, Band, etc. +template +struct storage_kind_to_shape; + +template <> +struct storage_kind_to_shape { + typedef DenseShape Shape; +}; +template <> +struct storage_kind_to_shape { + typedef SolverShape Shape; +}; +template <> +struct storage_kind_to_shape { + typedef PermutationShape Shape; +}; +template <> +struct storage_kind_to_shape { + typedef TranspositionsShape Shape; +}; + +// Evaluators have to be specialized with respect to various criteria such as: +// - storage/structure/shape +// - scalar type +// - etc. +// Therefore, we need specialization of evaluator providing additional template arguments for each kind of evaluators. +// We currently distinguish the following kind of evaluators: +// - unary_evaluator for expressions taking only one arguments (CwiseUnaryOp, CwiseUnaryView, Transpose, +// MatrixWrapper, ArrayWrapper, Reverse, Replicate) +// - binary_evaluator for expression taking two arguments (CwiseBinaryOp) +// - ternary_evaluator for expression taking three arguments (CwiseTernaryOp) +// - product_evaluator for linear algebra products (Product); special case of binary_evaluator because it requires +// additional tags for dispatching. +// - mapbase_evaluator for Map, Block, Ref +// - block_evaluator for Block (special dispatching to a mapbase_evaluator or unary_evaluator) + +template ::Kind, + typename Arg2Kind = typename evaluator_traits::Kind, + typename Arg3Kind = typename evaluator_traits::Kind, + typename Arg1Scalar = typename traits::Scalar, + typename Arg2Scalar = typename traits::Scalar, + typename Arg3Scalar = typename traits::Scalar> +struct ternary_evaluator; + +template ::Kind, + typename RhsKind = typename evaluator_traits::Kind, + typename LhsScalar = typename traits::Scalar, + typename RhsScalar = typename traits::Scalar> +struct binary_evaluator; + +template ::Kind, + typename Scalar = typename T::Scalar> +struct unary_evaluator; + +// evaluator_traits contains traits for evaluator + +template +struct evaluator_traits_base { + // by default, get evaluator kind and shape from storage + typedef typename storage_kind_to_evaluator_kind::StorageKind>::Kind Kind; + typedef typename storage_kind_to_shape::StorageKind>::Shape Shape; +}; + +// Default evaluator traits +template +struct evaluator_traits : public evaluator_traits_base {}; + +template ::Shape> +struct evaluator_assume_aliasing { + static const bool value = false; +}; + +// By default, we assume a unary expression: +template +struct evaluator : public unary_evaluator { + typedef unary_evaluator Base; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : Base(xpr) {} +}; + +// TODO: Think about const-correctness +template +struct evaluator : evaluator { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const T& xpr) : evaluator(xpr) {} +}; + +// ---------- base class for all evaluators ---------- + +template +struct evaluator_base { + // TODO that's not very nice to have to propagate all these traits. They are currently only needed to handle + // outer,inner indices. + typedef traits ExpressionTraits; + + enum { Alignment = 0 }; + // noncopyable: + // Don't make this class inherit noncopyable as this kills EBO (Empty Base Optimization) + // and make complex evaluator much larger than then should do. + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator_base() = default; + + private: + EIGEN_DEVICE_FUNC evaluator_base(const evaluator_base&); + EIGEN_DEVICE_FUNC const evaluator_base& operator=(const evaluator_base&); +}; + +// -------------------- Matrix and Array -------------------- +// +// evaluator is a common base class for the +// Matrix and Array evaluators. +// Here we directly specialize evaluator. This is not really a unary expression, and it is, by definition, dense, +// so no need for more sophisticated dispatching. + +// this helper permits to completely eliminate m_outerStride if it is known at compiletime. +template +class plainobjectbase_evaluator_data { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + : data(ptr) { +#ifndef EIGEN_INTERNAL_DEBUGGING + EIGEN_UNUSED_VARIABLE(outerStride); +#endif + eigen_internal_assert(outerStride == OuterStride); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const noexcept { return OuterStride; } + const Scalar* data; +}; + +template +class plainobjectbase_evaluator_data { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plainobjectbase_evaluator_data(const Scalar* ptr, Index outerStride) + : data(ptr), m_outerStride(outerStride) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index outerStride() const { return m_outerStride; } + const Scalar* data; + + protected: + Index m_outerStride; +}; + +template +struct evaluator> : evaluator_base { + typedef PlainObjectBase PlainObjectType; + typedef typename PlainObjectType::Scalar Scalar; + typedef typename PlainObjectType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = PlainObjectType::IsRowMajor, + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime, + RowsAtCompileTime = PlainObjectType::RowsAtCompileTime, + ColsAtCompileTime = PlainObjectType::ColsAtCompileTime, + + CoeffReadCost = NumTraits::ReadCost, + Flags = traits::EvaluatorFlags, + Alignment = traits::Alignment + }; + enum { + // We do not need to know the outer stride for vectors + OuterStrideAtCompileTime = IsVectorAtCompileTime ? 0 + : int(IsRowMajor) ? ColsAtCompileTime + : RowsAtCompileTime + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() : m_d(0, OuterStrideAtCompileTime) { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const PlainObjectType& m) + : m_d(m.data(), IsVectorAtCompileTime ? 0 : m.outerStride()) { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const { + return coeff(getIndex(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { return m_d.data[index]; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) { + return coeffRef(getIndex(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { + return const_cast(m_d.data)[index]; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return packet(getIndex(row, col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return ploadt(m_d.data + index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + writePacket(getIndex(row, col), x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + pstoret(const_cast(m_d.data) + index, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return packetSegment(getIndex(row, col), begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return ploadtSegment(m_d.data + index, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + writePacketSegment(getIndex(row, col), x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + pstoretSegment(const_cast(m_d.data) + index, x, begin, count); + } + + protected: + plainobjectbase_evaluator_data m_d; + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index constexpr getIndex(Index row, Index col) const { + return IsRowMajor ? row * m_d.outerStride() + col : row + col * m_d.outerStride(); + } +}; + +template +struct evaluator> + : evaluator>> { + typedef Matrix XprType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) + : evaluator>(m) {} +}; + +template +struct evaluator> + : evaluator>> { + typedef Array XprType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr evaluator() = default; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr explicit evaluator(const XprType& m) + : evaluator>(m) {} +}; + +// -------------------- Transpose -------------------- + +template +struct unary_evaluator, IndexBased> : evaluator_base> { + typedef Transpose XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags ^ RowMajorBit, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& t) : m_argImpl(t.nestedExpression()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_argImpl.coeff(col, row); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(col, row); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename XprType::Scalar& coeffRef(Index index) { + return m_argImpl.coeffRef(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return m_argImpl.template packet(col, row); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_argImpl.template packet(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + m_argImpl.template writePacket(col, row, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + m_argImpl.template writePacket(index, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_argImpl.template packetSegment(col, row, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_argImpl.template packetSegment(index, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(col, row, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(index, x, begin, count); + } + + protected: + evaluator m_argImpl; +}; + +// -------------------- CwiseNullaryOp -------------------- +// Like Matrix and Array, this is not really a unary expression, so we directly specialize evaluator. +// Likewise, there is not need to more sophisticated dispatching here. + +template ::value, + bool has_unary = has_unary_operator::value, + bool has_binary = has_binary_operator::value> +struct nullary_wrapper { + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return op(i, j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return op(i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return op.template packetOp(i, j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return op.template packetOp(i); + } +}; + +template +struct nullary_wrapper { + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType = 0, IndexType = 0) const { + return op(); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType = 0, IndexType = 0) const { + return op.template packetOp(); + } +}; + +template +struct nullary_wrapper { + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j = 0) const { + return op(i, j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j = 0) const { + return op.template packetOp(i, j); + } +}; + +// We need the following specialization for vector-only functors assigned to a runtime vector, +// for instance, using linspace and assigning a RowVectorXd to a MatrixXd or even a row of a MatrixXd. +// In this case, i==0 and j is used for the actual iteration. +template +struct nullary_wrapper { + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i == 0 || j == 0); + return op(i + j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + eigen_assert(i == 0 || j == 0); + return op.template packetOp(i + j); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return op(i); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return op.template packetOp(i); + } +}; + +template +struct nullary_wrapper {}; + +#if 0 && EIGEN_COMP_MSVC > 0 +// Disable this ugly workaround. This is now handled in traits::match, +// but this piece of code might still become handly if some other weird compilation +// errors pop up again. + +// MSVC exhibits a weird compilation error when +// compiling: +// Eigen::MatrixXf A = MatrixXf::Random(3,3); +// Ref R = 2.f*A; +// and that has_*ary_operator> have not been instantiated yet. +// The "problem" is that evaluator<2.f*A> is instantiated by traits::match<2.f*A> +// and at that time has_*ary_operator returns true regardless of T. +// Then nullary_wrapper is badly instantiated as nullary_wrapper<.,.,true,true,true>. +// The trick is thus to defer the proper instantiation of nullary_wrapper when coeff(), +// and packet() are really instantiated as implemented below: + +// This is a simple wrapper around Index to enforce the re-instantiation of +// has_*ary_operator when needed. +template struct nullary_wrapper_workaround_msvc { + nullary_wrapper_workaround_msvc(const T&); + operator T()const; +}; + +template +struct nullary_wrapper +{ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().operator()(op,i); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i, IndexType j) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i,j); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE T packetOp(const NullaryOp& op, IndexType i) const { + return nullary_wrapper >::value, + has_unary_operator >::value, + has_binary_operator >::value>().template packetOp(op,i); + } +}; +#endif // MSVC workaround + +template +struct evaluator> + : evaluator_base> { + typedef CwiseNullaryOp XprType; + typedef remove_all_t PlainObjectTypeCleaned; + + enum { + CoeffReadCost = functor_traits::Cost, + + Flags = (evaluator::Flags & + (HereditaryBits | (functor_has_linear_access::ret ? LinearAccessBit : 0) | + (functor_traits::PacketAccess ? PacketAccessBit : 0))) | + (functor_traits::IsRepeatable ? 0 : EvalBeforeNestingBit), + Alignment = AlignedMax + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& n) : m_functor(n.functor()), m_wrapper() { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(IndexType row, IndexType col) const { + return m_wrapper(m_functor, row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(IndexType index) const { + return m_wrapper(m_functor, index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType row, IndexType col) const { + return m_wrapper.template packetOp(m_functor, row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(IndexType index) const { + return m_wrapper.template packetOp(m_functor, index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(IndexType row, IndexType col, Index /*begin*/, + Index /*count*/) const { + return packet(row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(IndexType index, Index /*begin*/, + Index /*count*/) const { + return packet(index); + } + + protected: + const NullaryOp m_functor; + const nullary_wrapper m_wrapper; +}; + +// -------------------- CwiseUnaryOp -------------------- + +template +struct unary_evaluator, IndexBased> : evaluator_base> { + typedef CwiseUnaryOp XprType; + + enum { + CoeffReadCost = int(evaluator::CoeffReadCost) + int(functor_traits::Cost), + + Flags = evaluator::Flags & + (HereditaryBits | LinearAccessBit | (functor_traits::PacketAccess ? PacketAccessBit : 0)), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& op) : m_d(op) { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_d.func()(m_d.argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_d.func()(m_d.argImpl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return m_d.func().packetOp(m_d.argImpl.template packet(row, col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_d.func().packetOp(m_d.argImpl.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_d.func().packetOp(m_d.argImpl.template packetSegment(row, col, begin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_d.func().packetOp(m_d.argImpl.template packetSegment(index, begin, count)); + } + + protected: + // this helper permits to completely eliminate the functor if it is empty + struct Data { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + : op(xpr.functor()), argImpl(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& func() const { return op; } + UnaryOp op; + evaluator argImpl; + }; + + Data m_d; +}; + +// ----------------------- Casting --------------------- + +template +struct unary_evaluator, ArgType>, IndexBased> { + using CastOp = core_cast_op; + using XprType = CwiseUnaryOp; + + // Use the largest packet type by default + using SrcPacketType = typename packet_traits::type; + static constexpr int SrcPacketSize = unpacket_traits::size; + static constexpr int SrcPacketBytes = SrcPacketSize * sizeof(SrcType); + + enum { + CoeffReadCost = int(evaluator::CoeffReadCost) + int(functor_traits::Cost), + PacketAccess = functor_traits::PacketAccess, + ActualPacketAccessBit = PacketAccess ? PacketAccessBit : 0, + Flags = evaluator::Flags & (HereditaryBits | LinearAccessBit | ActualPacketAccessBit), + IsRowMajor = (evaluator::Flags & RowMajorBit), + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& xpr) + : m_argImpl(xpr.nestedExpression()), m_rows(xpr.rows()), m_cols(xpr.cols()) { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + template + using AltSrcScalarOp = std::enable_if_t<(unpacket_traits::size < SrcPacketSize && + !find_packet_by_size::size>::value), + bool>; + template + using SrcPacketArgs1 = + std::enable_if_t<(find_packet_by_size::size>::value), bool>; + template + using SrcPacketArgs2 = std::enable_if_t<(unpacket_traits::size) == (2 * SrcPacketSize), bool>; + template + using SrcPacketArgs4 = std::enable_if_t<(unpacket_traits::size) == (4 * SrcPacketSize), bool>; + template + using SrcPacketArgs8 = std::enable_if_t<(unpacket_traits::size) == (8 * SrcPacketSize), bool>; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index row, Index col, Index begin, Index count) const { + return IsRowMajor ? (col + count + begin <= cols()) : (row + count + begin <= rows()); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool check_array_bounds(Index index, Index begin, Index count) const { + return index + count + begin <= size(); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index row, Index col, Index offset) const { + Index actualRow = IsRowMajor ? row : row + offset; + Index actualCol = IsRowMajor ? col + offset : col; + return m_argImpl.coeff(actualRow, actualCol); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE SrcType srcCoeff(Index index, Index offset) const { + Index actualIndex = index + offset; + return m_argImpl.coeff(actualIndex); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstType coeff(Index row, Index col) const { + return cast(srcCoeff(row, col, 0)); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstType coeff(Index index) const { + return cast(srcCoeff(index, 0)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index row, Index col, Index offset) const { + constexpr int PacketSize = unpacket_traits::size; + Index packetOffset = offset * PacketSize; + Index actualRow = IsRowMajor ? row : row + packetOffset; + Index actualCol = IsRowMajor ? col + packetOffset : col; + eigen_assert(check_array_bounds(actualRow, actualCol, 0, PacketSize) && "Array index out of bounds"); + return m_argImpl.template packet(actualRow, actualCol); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacket(Index index, Index offset) const { + constexpr int PacketSize = unpacket_traits::size; + Index packetOffset = offset * PacketSize; + Index actualIndex = index + packetOffset; + eigen_assert(check_array_bounds(actualIndex, 0, PacketSize) && "Array index out of bounds"); + return m_argImpl.template packet(actualIndex); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacketSegment(Index row, Index col, Index begin, Index count, + Index offset) const { + constexpr int PacketSize = unpacket_traits::size; + Index packetOffset = offset * PacketSize; + Index actualRow = IsRowMajor ? row : row + packetOffset; + Index actualCol = IsRowMajor ? col + packetOffset : col; + eigen_assert(check_array_bounds(actualRow, actualCol, begin, count) && "Array index out of bounds"); + return m_argImpl.template packetSegment(actualRow, actualCol, begin, count); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType srcPacketSegment(Index index, Index begin, Index count, + Index offset) const { + constexpr int PacketSize = unpacket_traits::size; + Index packetOffset = offset * PacketSize; + Index actualIndex = index + packetOffset; + eigen_assert(check_array_bounds(actualIndex, begin, count) && "Array index out of bounds"); + return m_argImpl.template packetSegment(actualIndex, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketBlock srcPacketSegmentHelper(Index row, Index col, + Index begin, + Index count) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets; + for (Index i = 0; i < NumPackets; i++) packets.packet[i] = pzero(PacketType()); + Index offset = begin / SrcPacketSize; + Index actualBegin = begin % SrcPacketSize; + for (; offset < NumPackets; offset++) { + Index actualCount = numext::mini(SrcPacketSize - actualBegin, count); + packets.packet[offset] = srcPacketSegment(row, col, actualBegin, actualCount, offset); + if (count == actualCount) break; + actualBegin = 0; + count -= actualCount; + } + return packets; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketBlock srcPacketSegmentHelper(Index index, + Index begin, + Index count) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets; + for (Index i = 0; i < NumPackets; i++) packets.packet[i] = pzero(PacketType()); + Index offset = begin / SrcPacketSize; + Index actualBegin = begin % SrcPacketSize; + for (; offset < NumPackets; offset++) { + Index actualCount = numext::mini(SrcPacketSize - actualBegin, count); + packets.packet[offset] = srcPacketSegment(index, actualBegin, actualCount, offset); + if (count == actualCount) break; + actualBegin = 0; + count -= actualCount; + } + return packets; + } + + // There is no source packet type with equal or fewer elements than DstPacketType. + // This is problematic as the evaluation loop may attempt to access data outside the bounds of the array. + // For example, consider the cast utilizing pcast with an array of size 4: {0.0f,1.0f,2.0f,3.0f}. + // The first iteration of the evaluation loop will load 16 bytes: {0.0f,1.0f,2.0f,3.0f} and cast to {0.0,1.0}, which + // is acceptable. The second iteration will load 16 bytes: {2.0f,3.0f,?,?}, which is outside the bounds of the array. + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + constexpr int DstPacketSize = unpacket_traits::size; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacketSegment(row, col, 0, DstPacketSize, 0)); + } + // Use the source packet type with the same size as DstPacketType, if it exists + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + constexpr int DstPacketSize = unpacket_traits::size; + using SizedSrcPacketType = typename find_packet_by_size::type; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacket(row, col, 0)); + } + // unpacket_traits::size == 2 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + return pcast(srcPacket(row, col, 0), + srcPacket(row, col, 1)); + } + // unpacket_traits::size == 4 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + return pcast(srcPacket(row, col, 0), srcPacket(row, col, 1), + srcPacket(row, col, 2), + srcPacket(row, col, 3)); + } + // unpacket_traits::size == 8 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index row, Index col) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + return pcast( + srcPacket(row, col, 0), srcPacket(row, col, 1), srcPacket(row, col, 2), + srcPacket(row, col, 3), srcPacket(row, col, 4), srcPacket(row, col, 5), + srcPacket(row, col, 6), srcPacket(row, col, 7)); + } + + // packetSegment variants + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacketSegment(row, col, begin, count, 0)); + } + // Use the source packet type with the same size as DstPacketType, if it exists + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + using SizedSrcPacketType = typename find_packet_by_size::type; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast( + srcPacketSegment(row, col, begin, count, 0)); + } + // unpacket_traits::size == 2 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int NumPackets = 2; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(row, col, begin, count); + return pcast(packets.packet[0], packets.packet[1]); + } + // unpacket_traits::size == 4 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int NumPackets = 4; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(row, col, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3]); + } + // unpacket_traits::size == 8 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + constexpr int NumPackets = 8; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(row, col, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3], packets.packet[4], packets.packet[5], + packets.packet[6], packets.packet[7]); + } + + // Analogous routines for linear access. + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + constexpr int DstPacketSize = unpacket_traits::size; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacketSegment(index, 0, DstPacketSize, 0)); + } + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + constexpr int DstPacketSize = unpacket_traits::size; + using SizedSrcPacketType = typename find_packet_by_size::type; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacket(index, 0)); + } + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + return pcast(srcPacket(index, 0), srcPacket(index, 1)); + } + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + return pcast(srcPacket(index, 0), srcPacket(index, 1), + srcPacket(index, 2), srcPacket(index, 3)); + } + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packet(Index index) const { + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + return pcast(srcPacket(index, 0), srcPacket(index, 1), + srcPacket(index, 2), srcPacket(index, 3), + srcPacket(index, 4), srcPacket(index, 5), + srcPacket(index, 6), srcPacket(index, 7)); + } + + // packetSegment variants + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast(srcPacketSegment(index, begin, count, 0)); + } + // Use the source packet type with the same size as DstPacketType, if it exists + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int DstPacketSize = unpacket_traits::size; + using SizedSrcPacketType = typename find_packet_by_size::type; + constexpr int SrcBytesIncrement = DstPacketSize * sizeof(SrcType); + constexpr int SrcLoadMode = plain_enum_min(SrcBytesIncrement, LoadMode); + return pcast( + srcPacketSegment(index, begin, count, 0)); + } + // unpacket_traits::size == 2 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int NumPackets = 2; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(index, begin, count); + return pcast(packets.packet[0], packets.packet[1]); + } + // unpacket_traits::size == 4 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int NumPackets = 4; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(index, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3]); + } + // unpacket_traits::size == 8 * SrcPacketSize + template = true> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DstPacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int NumPackets = 8; + constexpr int SrcLoadMode = plain_enum_min(SrcPacketBytes, LoadMode); + PacketBlock packets = + srcPacketSegmentHelper(index, begin, count); + return pcast(packets.packet[0], packets.packet[1], packets.packet[2], + packets.packet[3], packets.packet[4], packets.packet[5], + packets.packet[6], packets.packet[7]); + } + + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { return m_rows; } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { return m_cols; } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_rows * m_cols; } + + protected: + const evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + +// -------------------- CwiseTernaryOp -------------------- + +// this is a ternary expression +template +struct evaluator> + : public ternary_evaluator> { + typedef CwiseTernaryOp XprType; + typedef ternary_evaluator> Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct ternary_evaluator, IndexBased, IndexBased> + : evaluator_base> { + typedef CwiseTernaryOp XprType; + + enum { + CoeffReadCost = int(evaluator::CoeffReadCost) + int(evaluator::CoeffReadCost) + + int(evaluator::CoeffReadCost) + int(functor_traits::Cost), + + Arg1Flags = evaluator::Flags, + Arg2Flags = evaluator::Flags, + Arg3Flags = evaluator::Flags, + SameType = is_same::value && + is_same::value, + StorageOrdersAgree = (int(Arg1Flags) & RowMajorBit) == (int(Arg2Flags) & RowMajorBit) && + (int(Arg1Flags) & RowMajorBit) == (int(Arg3Flags) & RowMajorBit), + Flags0 = (int(Arg1Flags) | int(Arg2Flags) | int(Arg3Flags)) & + (HereditaryBits | + (int(Arg1Flags) & int(Arg2Flags) & int(Arg3Flags) & + ((StorageOrdersAgree ? LinearAccessBit : 0) | + (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)))), + Flags = (Flags0 & ~RowMajorBit) | (Arg1Flags & RowMajorBit), + Alignment = plain_enum_min(plain_enum_min(evaluator::Alignment, evaluator::Alignment), + evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC explicit ternary_evaluator(const XprType& xpr) : m_d(xpr) { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_d.func()(m_d.arg1Impl.coeff(row, col), m_d.arg2Impl.coeff(row, col), m_d.arg3Impl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_d.func()(m_d.arg1Impl.coeff(index), m_d.arg2Impl.coeff(index), m_d.arg3Impl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return m_d.func().packetOp(m_d.arg1Impl.template packet(row, col), + m_d.arg2Impl.template packet(row, col), + m_d.arg3Impl.template packet(row, col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_d.func().packetOp(m_d.arg1Impl.template packet(index), + m_d.arg2Impl.template packet(index), + m_d.arg3Impl.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_d.func().packetOp(m_d.arg1Impl.template packetSegment(row, col, begin, count), + m_d.arg2Impl.template packetSegment(row, col, begin, count), + m_d.arg3Impl.template packetSegment(row, col, begin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_d.func().packetOp(m_d.arg1Impl.template packetSegment(index, begin, count), + m_d.arg2Impl.template packetSegment(index, begin, count), + m_d.arg3Impl.template packetSegment(index, begin, count)); + } + + protected: + // this helper permits to completely eliminate the functor if it is empty + struct Data { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + : op(xpr.functor()), arg1Impl(xpr.arg1()), arg2Impl(xpr.arg2()), arg3Impl(xpr.arg3()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const TernaryOp& func() const { return op; } + TernaryOp op; + evaluator arg1Impl; + evaluator arg2Impl; + evaluator arg3Impl; + }; + + Data m_d; +}; + +template +struct scalar_boolean_select_spec { + using DummyTernaryOp = scalar_boolean_select_op; + using DummyArg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + using DummyXprType = CwiseTernaryOp; + + // only use the typed comparison if it is vectorized + static constexpr bool UseTyped = functor_traits>::PacketAccess; + using CondScalar = std::conditional_t; + + using TernaryOp = scalar_boolean_select_op; + using Arg3 = CwiseBinaryOp, CmpLhsType, CmpRhsType>; + using XprType = CwiseTernaryOp; + + using Base = ternary_evaluator; +}; + +// specialization for expressions like (a < b).select(c, d) to enable full vectorization +template +struct evaluator, Arg1, Arg2, + CwiseBinaryOp, CmpLhsType, CmpRhsType>>> + : public scalar_boolean_select_spec::Base { + using Helper = scalar_boolean_select_spec; + using Base = typename Helper::Base; + using DummyXprType = typename Helper::DummyXprType; + using Arg3 = typename Helper::Arg3; + using XprType = typename Helper::XprType; + + EIGEN_DEVICE_FUNC explicit evaluator(const DummyXprType& xpr) + : Base(XprType(xpr.arg1(), xpr.arg2(), Arg3(xpr.arg3().lhs(), xpr.arg3().rhs()))) {} +}; + +// -------------------- CwiseBinaryOp -------------------- + +// this is a binary expression +template +struct evaluator> : public binary_evaluator> { + typedef CwiseBinaryOp XprType; + typedef binary_evaluator> Base; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +template +struct binary_evaluator, IndexBased, IndexBased> + : evaluator_base> { + typedef CwiseBinaryOp XprType; + + enum { + CoeffReadCost = + int(evaluator::CoeffReadCost) + int(evaluator::CoeffReadCost) + int(functor_traits::Cost), + + LhsFlags = evaluator::Flags, + RhsFlags = evaluator::Flags, + SameType = is_same::value, + StorageOrdersAgree = (int(LhsFlags) & RowMajorBit) == (int(RhsFlags) & RowMajorBit), + Flags0 = (int(LhsFlags) | int(RhsFlags)) & + (HereditaryBits | + (int(LhsFlags) & int(RhsFlags) & + ((StorageOrdersAgree ? LinearAccessBit : 0) | + (functor_traits::PacketAccess && StorageOrdersAgree && SameType ? PacketAccessBit : 0)))), + Flags = (Flags0 & ~RowMajorBit) | (LhsFlags & RowMajorBit), + Alignment = plain_enum_min(evaluator::Alignment, evaluator::Alignment) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit binary_evaluator(const XprType& xpr) : m_d(xpr) { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_d.func()(m_d.lhsImpl.coeff(row, col), m_d.rhsImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_d.func()(m_d.lhsImpl.coeff(index), m_d.rhsImpl.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return m_d.func().packetOp(m_d.lhsImpl.template packet(row, col), + m_d.rhsImpl.template packet(row, col)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_d.func().packetOp(m_d.lhsImpl.template packet(index), + m_d.rhsImpl.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_d.func().packetOp(m_d.lhsImpl.template packetSegment(row, col, begin, count), + m_d.rhsImpl.template packetSegment(row, col, begin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_d.func().packetOp(m_d.lhsImpl.template packetSegment(index, begin, count), + m_d.rhsImpl.template packetSegment(index, begin, count)); + } + + protected: + // this helper permits to completely eliminate the functor if it is empty + struct Data { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + : op(xpr.functor()), lhsImpl(xpr.lhs()), rhsImpl(xpr.rhs()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const BinaryOp& func() const { return op; } + BinaryOp op; + evaluator lhsImpl; + evaluator rhsImpl; + }; + + Data m_d; +}; + +// -------------------- CwiseUnaryView -------------------- + +template +struct unary_evaluator, IndexBased> + : evaluator_base> { + typedef CwiseUnaryView XprType; + + enum { + CoeffReadCost = int(evaluator::CoeffReadCost) + int(functor_traits::Cost), + + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessBit | DirectAccessBit)), + + Alignment = 0 // FIXME it is not very clear why alignment is necessarily lost... + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& op) : m_d(op) { + EIGEN_INTERNAL_CHECK_COST_VALUE(functor_traits::Cost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_d.func()(m_d.argImpl.coeff(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_d.func()(m_d.argImpl.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return m_d.func()(m_d.argImpl.coeffRef(row, col)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return m_d.func()(m_d.argImpl.coeffRef(index)); + } + + protected: + // this helper permits to completely eliminate the functor if it is empty + struct Data { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Data(const XprType& xpr) + : op(xpr.functor()), argImpl(xpr.nestedExpression()) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& func() const { return op; } + UnaryOp op; + evaluator argImpl; + }; + + Data m_d; +}; + +// -------------------- Map -------------------- + +// FIXME perhaps the PlainObjectType could be provided by Derived::PlainObject ? +// but that might complicate template specialization +template +struct mapbase_evaluator; + +template +struct mapbase_evaluator : evaluator_base { + typedef Derived XprType; + typedef typename XprType::PointerType PointerType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::RowsAtCompileTime, + ColsAtCompileTime = XprType::ColsAtCompileTime, + CoeffReadCost = NumTraits::ReadCost + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit mapbase_evaluator(const XprType& map) + : m_data(const_cast(map.data())), + m_innerStride(map.innerStride()), + m_outerStride(map.outerStride()) { + EIGEN_STATIC_ASSERT(check_implication((evaluator::Flags & PacketAccessBit) != 0, + inner_stride_at_compile_time::ret == 1), + PACKET_ACCESS_REQUIRES_TO_HAVE_INNER_STRIDE_FIXED_TO_1); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_data[index * m_innerStride.value()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_data[index * m_innerStride.value()]; } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return ploadt(ptr); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return ploadt(m_data + index * m_innerStride.value()); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + pstoret(ptr, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + pstoret(m_data + index * m_innerStride.value(), x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + return ploadtSegment(ptr, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return ploadtSegment(m_data + index * m_innerStride.value(), begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + PointerType ptr = m_data + row * rowStride() + col * colStride(); + pstoretSegment(ptr, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + pstoretSegment(m_data + index * m_innerStride.value(), x, begin, count); + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowStride() const noexcept { + return XprType::IsRowMajor ? m_outerStride.value() : m_innerStride.value(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colStride() const noexcept { + return XprType::IsRowMajor ? m_innerStride.value() : m_outerStride.value(); + } + + PointerType m_data; + const variable_if_dynamic m_innerStride; + const variable_if_dynamic m_outerStride; +}; + +template +struct evaluator> + : public mapbase_evaluator, PlainObjectType> { + typedef Map XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? int(PlainObjectType::OuterStrideAtCompileTime) + : int(StrideType::OuterStrideAtCompileTime), + HasNoInnerStride = InnerStrideAtCompileTime == 1, + HasNoOuterStride = StrideType::OuterStrideAtCompileTime == 0, + HasNoStride = HasNoInnerStride && HasNoOuterStride, + IsDynamicSize = PlainObjectType::SizeAtCompileTime == Dynamic, + + PacketAccessMask = bool(HasNoInnerStride) ? ~int(0) : ~int(PacketAccessBit), + LinearAccessMask = + bool(HasNoStride) || bool(PlainObjectType::IsVectorAtCompileTime) ? ~int(0) : ~int(LinearAccessBit), + Flags = int(evaluator::Flags) & (LinearAccessMask & PacketAccessMask), + + Alignment = int(MapOptions) & int(AlignedMask) + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& map) : mapbase_evaluator(map) {} +}; + +// -------------------- Ref -------------------- + +template +struct evaluator> + : public mapbase_evaluator, PlainObjectType> { + typedef Ref XprType; + + enum { + Flags = evaluator>::Flags, + Alignment = evaluator>::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& ref) + : mapbase_evaluator(ref) {} +}; + +// -------------------- Block -------------------- + +template ::ret> +struct block_evaluator; + +template +struct evaluator> + : block_evaluator { + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types once we can handle multi-sized packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + + ArgTypeIsRowMajor = (int(evaluator::Flags) & RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 + : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 + : ArgTypeIsRowMajor, + HasSameStorageOrderAsArgType = (IsRowMajor == ArgTypeIsRowMajor), + InnerSize = IsRowMajor ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + OuterStrideAtCompileTime = HasSameStorageOrderAsArgType ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + MaskPacketAccessBit = (InnerStrideAtCompileTime == 1 || HasSameStorageOrderAsArgType) ? PacketAccessBit : 0, + + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1 || + (InnerPanel && (evaluator::Flags & LinearAccessBit))) + ? LinearAccessBit + : 0, + FlagsRowMajorBit = XprType::Flags & RowMajorBit, + Flags0 = evaluator::Flags & ((HereditaryBits & ~RowMajorBit) | DirectAccessBit | MaskPacketAccessBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment0 = (InnerPanel && (OuterStrideAtCompileTime != Dynamic) && (OuterStrideAtCompileTime != 0) && + (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % int(PacketAlignment)) == 0)) + ? int(PacketAlignment) + : 0, + Alignment = plain_enum_min(evaluator::Alignment, Alignment0) + }; + typedef block_evaluator block_evaluator_type; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& block) : block_evaluator_type(block) { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +// no direct-access => dispatch to a unary evaluator +template +struct block_evaluator + : unary_evaluator> { + typedef Block XprType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) + : unary_evaluator(block) {} +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base> { + typedef Block XprType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& block) + : m_argImpl(block.nestedExpression()), + m_startRow(block.startRow()), + m_startCol(block.startCol()), + m_linear_offset(ForwardLinearAccess + ? (ArgType::IsRowMajor + ? block.startRow() * block.nestedExpression().cols() + block.startCol() + : block.startCol() * block.nestedExpression().rows() + block.startRow()) + : 0) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + RowsAtCompileTime = XprType::RowsAtCompileTime, + ForwardLinearAccess = (InnerPanel || int(XprType::IsRowMajor) == int(ArgType::IsRowMajor)) && + bool(evaluator::Flags & LinearAccessBit) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_argImpl.coeff(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return linear_coeff_impl(index, bool_constant()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return m_argImpl.coeffRef(m_startRow.value() + row, m_startCol.value() + col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return linear_coeffRef_impl(index, bool_constant()); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return m_argImpl.template packet(m_startRow.value() + row, m_startCol.value() + col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + if (ForwardLinearAccess) + return m_argImpl.template packet(m_linear_offset.value() + index); + else + return packet(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + return m_argImpl.template writePacket(m_startRow.value() + row, m_startCol.value() + col, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + if (ForwardLinearAccess) + return m_argImpl.template writePacket(m_linear_offset.value() + index, x); + else + return writePacket(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, + x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_argImpl.template packetSegment(m_startRow.value() + row, m_startCol.value() + col, + begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + if (ForwardLinearAccess) + return m_argImpl.template packetSegment(m_linear_offset.value() + index, begin, count); + else + return packetSegment(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0, + begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + return m_argImpl.template writePacketSegment(m_startRow.value() + row, + m_startCol.value() + col, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + if (ForwardLinearAccess) + return m_argImpl.template writePacketSegment(m_linear_offset.value() + index, x, begin, + count); + else + return writePacketSegment(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0, x, begin, count); + } + + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType + linear_coeff_impl(Index index, internal::true_type /* ForwardLinearAccess */) const { + return m_argImpl.coeff(m_linear_offset.value() + index); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType + linear_coeff_impl(Index index, internal::false_type /* not ForwardLinearAccess */) const { + return coeff(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& linear_coeffRef_impl(Index index, + internal::true_type /* ForwardLinearAccess */) { + return m_argImpl.coeffRef(m_linear_offset.value() + index); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& linear_coeffRef_impl( + Index index, internal::false_type /* not ForwardLinearAccess */) { + return coeffRef(RowsAtCompileTime == 1 ? 0 : index, RowsAtCompileTime == 1 ? index : 0); + } + + evaluator m_argImpl; + const variable_if_dynamic m_startRow; + const variable_if_dynamic m_startCol; + const variable_if_dynamic m_linear_offset; +}; + +// TODO: This evaluator does not actually use the child evaluator; +// all action is via the data() as returned by the Block expression. + +template +struct block_evaluator + : mapbase_evaluator, + typename Block::PlainObject> { + typedef Block XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit block_evaluator(const XprType& block) + : mapbase_evaluator(block) { + eigen_internal_assert((internal::is_constant_evaluated() || + (std::uintptr_t(block.data()) % plain_enum_max(1, evaluator::Alignment)) == 0) && + "data is not aligned"); + } +}; + +// -------------------- Replicate -------------------- + +template +struct unary_evaluator> + : evaluator_base> { + typedef Replicate XprType; + typedef typename XprType::CoeffReturnType CoeffReturnType; + enum { Factor = (RowFactor == Dynamic || ColFactor == Dynamic) ? Dynamic : RowFactor * ColFactor }; + typedef typename nested_eval::type ArgTypeNested; + typedef remove_all_t ArgTypeNestedCleaned; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + LinearAccessMask = XprType::IsVectorAtCompileTime ? LinearAccessBit : 0, + Flags = (evaluator::Flags & (HereditaryBits | LinearAccessMask) & ~RowMajorBit) | + (traits::Flags & RowMajorBit), + + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& replicate) + : m_arg(replicate.nestedExpression()), + m_argImpl(m_arg), + m_rows(replicate.nestedExpression().rows()), + m_cols(replicate.nestedExpression().cols()) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); + const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); + + return m_argImpl.coeff(actual_row, actual_col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + // try to avoid using modulo; this is a pure optimization strategy + const Index actual_index = traits::RowsAtCompileTime == 1 + ? (ColFactor == 1 ? index : index % m_cols.value()) + : (RowFactor == 1 ? index : index % m_rows.value()); + + return m_argImpl.coeff(actual_index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); + const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); + + return m_argImpl.template packet(actual_row, actual_col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + const Index actual_index = traits::RowsAtCompileTime == 1 + ? (ColFactor == 1 ? index : index % m_cols.value()) + : (RowFactor == 1 ? index : index % m_rows.value()); + + return m_argImpl.template packet(actual_index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + const Index actual_row = traits::RowsAtCompileTime == 1 ? 0 : RowFactor == 1 ? row : row % m_rows.value(); + const Index actual_col = traits::ColsAtCompileTime == 1 ? 0 : ColFactor == 1 ? col : col % m_cols.value(); + + return m_argImpl.template packetSegment(actual_row, actual_col, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + const Index actual_index = traits::RowsAtCompileTime == 1 + ? (ColFactor == 1 ? index : index % m_cols.value()) + : (RowFactor == 1 ? index : index % m_rows.value()); + + return m_argImpl.template packetSegment(actual_index, begin, count); + } + + protected: + const ArgTypeNested m_arg; + evaluator m_argImpl; + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + +// -------------------- MatrixWrapper and ArrayWrapper -------------------- +// +// evaluator_wrapper_base is a common base class for the +// MatrixWrapper and ArrayWrapper evaluators. + +template +struct evaluator_wrapper_base : evaluator_base { + typedef remove_all_t ArgType; + enum { + CoeffReadCost = evaluator::CoeffReadCost, + Flags = evaluator::Flags, + Alignment = evaluator::Alignment + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator_wrapper_base(const ArgType& arg) : m_argImpl(arg) {} + + typedef typename ArgType::Scalar Scalar; + typedef typename ArgType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_argImpl.coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { return m_argImpl.coeff(index); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { return m_argImpl.coeffRef(row, col); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { return m_argImpl.coeffRef(index); } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return m_argImpl.template packet(row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_argImpl.template packet(index); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + m_argImpl.template writePacket(row, col, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + m_argImpl.template writePacket(index, x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return m_argImpl.template packetSegment(row, col, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + return m_argImpl.template packetSegment(index, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(row, col, x, begin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + m_argImpl.template writePacketSegment(index, x, begin, count); + } + + protected: + evaluator m_argImpl; +}; + +template +struct unary_evaluator> : evaluator_wrapper_base> { + typedef MatrixWrapper XprType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base>(wrapper.nestedExpression()) {} +}; + +template +struct unary_evaluator> : evaluator_wrapper_base> { + typedef ArrayWrapper XprType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& wrapper) + : evaluator_wrapper_base>(wrapper.nestedExpression()) {} +}; + +// -------------------- Reverse -------------------- + +// defined in Reverse.h: +template +struct reverse_packet_cond; + +template +struct unary_evaluator> : evaluator_base> { + typedef Reverse XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + enum { + IsRowMajor = XprType::IsRowMajor, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + ReversePacket = (Direction == BothDirections) || ((Direction == Vertical) && IsColMajor) || + ((Direction == Horizontal) && IsRowMajor), + + CoeffReadCost = evaluator::CoeffReadCost, + + // let's enable LinearAccess only with vectorization because of the product overhead + // FIXME enable DirectAccess with negative strides? + Flags0 = evaluator::Flags, + LinearAccess = + ((Direction == BothDirections) && (int(Flags0) & PacketAccessBit)) || + ((ReverseRow && XprType::ColsAtCompileTime == 1) || (ReverseCol && XprType::RowsAtCompileTime == 1)) + ? LinearAccessBit + : 0, + + Flags = int(Flags0) & (HereditaryBits | PacketAccessBit | LinearAccess), + + Alignment = 0 // FIXME in some rare cases, Alignment could be preserved, like a Vector4f. + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit unary_evaluator(const XprType& reverse) + : m_argImpl(reverse.nestedExpression()), + m_rows(ReverseRow ? reverse.nestedExpression().rows() : 1), + m_cols(ReverseCol ? reverse.nestedExpression().cols() : 1) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + return m_argImpl.coeff(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_argImpl.coeff(m_rows.value() * m_cols.value() - index - 1); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return m_argImpl.coeffRef(ReverseRow ? m_rows.value() - row - 1 : row, ReverseCol ? m_cols.value() - col - 1 : col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return m_argImpl.coeffRef(m_rows.value() * m_cols.value() - index - 1); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + + return reverse_packet::run(m_argImpl.template packet(actualRow, actualCol)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + + return preverse(m_argImpl.template packet(actualIndex)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index row, Index col, const PacketType& x) { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + + m_argImpl.template writePacket(actualRow, actualCol, reverse_packet::run(x)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacket(Index index, const PacketType& x) { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + + m_argImpl.template writePacket(actualIndex, preverse(x)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + Index actualBegin = ReversePacket ? (PacketSize - count - begin) : begin; + + return reverse_packet::run( + m_argImpl.template packetSegment(actualRow, actualCol, actualBegin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + Index actualBegin = PacketSize - count - begin; + + return preverse(m_argImpl.template packetSegment(actualIndex, actualBegin, count)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index row, Index col, const PacketType& x, Index begin, + Index count) { + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1; + static constexpr int OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1; + using reverse_packet = reverse_packet_cond; + + Index actualRow = ReverseRow ? m_rows.value() - row - OffsetRow : row; + Index actualCol = ReverseCol ? m_cols.value() - col - OffsetCol : col; + Index actualBegin = ReversePacket ? (PacketSize - count - begin) : begin; + + m_argImpl.template writePacketSegment(actualRow, actualCol, reverse_packet::run(x), actualBegin, count); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void writePacketSegment(Index index, const PacketType& x, Index begin, + Index count) { + static constexpr int PacketSize = unpacket_traits::size; + + Index actualIndex = m_rows.value() * m_cols.value() - index - PacketSize; + Index actualBegin = PacketSize - count - begin; + + m_argImpl.template writePacketSegment(actualIndex, preverse(x), actualBegin, count); + } + + protected: + evaluator m_argImpl; + + // If we do not reverse rows, then we do not need to know the number of rows; same for columns + // Nonetheless, in this case it is important to set to 1 such that the coeff(index) method works fine for vectors. + const variable_if_dynamic m_rows; + const variable_if_dynamic m_cols; +}; + +// -------------------- Diagonal -------------------- + +template +struct evaluator> : evaluator_base> { + typedef Diagonal XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + + Flags = + (unsigned int)(evaluator::Flags & (HereditaryBits | DirectAccessBit) & ~RowMajorBit) | LinearAccessBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& diagonal) + : m_argImpl(diagonal.nestedExpression()), m_index(diagonal.index()) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index) const { + return m_argImpl.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + return m_argImpl.coeff(index + rowOffset(), index + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index) { + return m_argImpl.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return m_argImpl.coeffRef(index + rowOffset(), index + colOffset()); + } + + protected: + evaluator m_argImpl; + const variable_if_dynamicindex m_index; + + private: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const { + return m_index.value() > 0 ? 0 : -m_index.value(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const { + return m_index.value() > 0 ? m_index.value() : 0; + } +}; + +//---------------------------------------------------------------------- +// deprecated code +//---------------------------------------------------------------------- + +// -------------------- EvalToTemp -------------------- + +// expression class for evaluating nested expression to a temporary + +template +class EvalToTemp; + +template +struct traits> : public traits {}; + +template +class EvalToTemp : public dense_xpr_base>::type { + public: + typedef typename dense_xpr_base::type Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(EvalToTemp) + + explicit EvalToTemp(const ArgType& arg) : m_arg(arg) {} + + const ArgType& arg() const { return m_arg; } + + constexpr Index rows() const noexcept { return m_arg.rows(); } + + constexpr Index cols() const noexcept { return m_arg.cols(); } + + private: + const ArgType& m_arg; +}; + +template +struct evaluator> : public evaluator { + typedef EvalToTemp XprType; + typedef typename ArgType::PlainObject PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.arg()) { + internal::construct_at(this, m_result); + } + + // This constructor is used when nesting an EvalTo evaluator in another evaluator + EIGEN_DEVICE_FUNC evaluator(const ArgType& arg) : m_result(arg) { internal::construct_at(this, m_result); } + + protected: + PlainObject m_result; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREEVALUATORS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CoreIterators.h b/o-voxel/third_party/eigen/Eigen/src/Core/CoreIterators.h new file mode 100644 index 0000000000000000000000000000000000000000..8350b5b9eebc4567c0ff69b9475bea10e4fd57a8 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CoreIterators.h @@ -0,0 +1,141 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_COREITERATORS_H +#define EIGEN_COREITERATORS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/* This file contains the respective InnerIterator definition of the expressions defined in Eigen/Core + */ + +namespace internal { + +template +class inner_iterator_selector; + +} + +/** \class InnerIterator + * \brief An InnerIterator allows to loop over the element of any matrix expression. + * + * \warning To be used with care because an evaluator is constructed every time an InnerIterator iterator is + * constructed. + * + * TODO: add a usage example + */ +template +class InnerIterator { + protected: + typedef internal::inner_iterator_selector::Kind> IteratorType; + typedef internal::evaluator EvaluatorType; + typedef typename internal::traits::Scalar Scalar; + + public: + /** Construct an iterator over the \a outerId -th row or column of \a xpr */ + InnerIterator(const XprType &xpr, const Index &outerId) : m_eval(xpr), m_iter(m_eval, outerId, xpr.innerSize()) {} + + /// \returns the value of the current coefficient. + EIGEN_STRONG_INLINE Scalar value() const { return m_iter.value(); } + /** Increment the iterator \c *this to the next non-zero coefficient. + * Explicit zeros are not skipped over. To skip explicit zeros, see class SparseView + */ + EIGEN_STRONG_INLINE InnerIterator &operator++() { + m_iter.operator++(); + return *this; + } + EIGEN_STRONG_INLINE InnerIterator &operator+=(Index i) { + m_iter.operator+=(i); + return *this; + } + EIGEN_STRONG_INLINE InnerIterator operator+(Index i) { + InnerIterator result(*this); + result += i; + return result; + } + + /// \returns the column or row index of the current coefficient. + EIGEN_STRONG_INLINE Index index() const { return m_iter.index(); } + /// \returns the row index of the current coefficient. + EIGEN_STRONG_INLINE Index row() const { return m_iter.row(); } + /// \returns the column index of the current coefficient. + EIGEN_STRONG_INLINE Index col() const { return m_iter.col(); } + /// \returns \c true if the iterator \c *this still references a valid coefficient. + EIGEN_STRONG_INLINE operator bool() const { return m_iter; } + + protected: + EvaluatorType m_eval; + IteratorType m_iter; + + private: + // If you get here, then you're not using the right InnerIterator type, e.g.: + // SparseMatrix A; + // SparseMatrix::InnerIterator it(A,0); + template + InnerIterator(const EigenBase &, Index outer); +}; + +namespace internal { + +// Generic inner iterator implementation for dense objects +template +class inner_iterator_selector { + protected: + typedef evaluator EvaluatorType; + typedef typename traits::Scalar Scalar; + enum { IsRowMajor = (XprType::Flags & RowMajorBit) == RowMajorBit }; + + public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, const Index &innerSize) + : m_eval(eval), m_inner(0), m_outer(outerId), m_end(innerSize) {} + + EIGEN_STRONG_INLINE Scalar value() const { + return (IsRowMajor) ? m_eval.coeff(m_outer, m_inner) : m_eval.coeff(m_inner, m_outer); + } + + EIGEN_STRONG_INLINE inner_iterator_selector &operator++() { + m_inner++; + return *this; + } + + EIGEN_STRONG_INLINE Index index() const { return m_inner; } + inline Index row() const { return IsRowMajor ? m_outer : index(); } + inline Index col() const { return IsRowMajor ? index() : m_outer; } + + EIGEN_STRONG_INLINE operator bool() const { return m_inner < m_end && m_inner >= 0; } + + protected: + const EvaluatorType &m_eval; + Index m_inner; + const Index m_outer; + const Index m_end; +}; + +// For iterator-based evaluator, inner-iterator is already implemented as +// evaluator<>::InnerIterator +template +class inner_iterator_selector : public evaluator::InnerIterator { + protected: + typedef typename evaluator::InnerIterator Base; + typedef evaluator EvaluatorType; + + public: + EIGEN_STRONG_INLINE inner_iterator_selector(const EvaluatorType &eval, const Index &outerId, + const Index & /*innerSize*/) + : Base(eval, outerId) {} +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_COREITERATORS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CwiseBinaryOp.h b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseBinaryOp.h new file mode 100644 index 0000000000000000000000000000000000000000..a966afc283b85dc988c75fb73600759048e9327e --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseBinaryOp.h @@ -0,0 +1,166 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_BINARY_OP_H +#define EIGEN_CWISE_BINARY_OP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits> { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef remove_all_t Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Lhs and Rhs to have the same scalar type (see CwiseBinaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + typedef typename cwise_promote_storage_type::StorageKind, typename traits::StorageKind, + BinaryOp>::ret StorageKind; + typedef typename promote_index_type::StorageIndex, typename traits::StorageIndex>::type + StorageIndex; + typedef typename Lhs::Nested LhsNested; + typedef typename Rhs::Nested RhsNested; + typedef std::remove_reference_t LhsNested_; + typedef std::remove_reference_t RhsNested_; + enum { + Flags = cwise_promote_storage_order::StorageKind, typename traits::StorageKind, + LhsNested_::Flags & RowMajorBit, RhsNested_::Flags & RowMajorBit>::value + }; +}; +} // end namespace internal + +template +class CwiseBinaryOpImpl; + +/** \class CwiseBinaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise binary operator is applied to two expressions + * + * \tparam BinaryOp template functor implementing the operator + * \tparam LhsType the type of the left-hand side + * \tparam RhsType the type of the right-hand side + * + * This class represents an expression where a coefficient-wise binary operator is applied to two expressions. + * It is the return type of binary operators, by which we mean only those binary operators where + * both the left-hand side and the right-hand side are Eigen expressions. + * For example, the return type of matrix1+matrix2 is a CwiseBinaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseBinaryOp types explicitly. + * + * \sa MatrixBase::binaryExpr(const MatrixBase &,const CustomBinaryOp &) const, class CwiseUnaryOp, class + * CwiseNullaryOp + */ +template +class CwiseBinaryOp : public CwiseBinaryOpImpl::StorageKind, + typename internal::traits::StorageKind, BinaryOp>::ret>, + internal::no_assignment_operator { + public: + typedef internal::remove_all_t Functor; + typedef internal::remove_all_t Lhs; + typedef internal::remove_all_t Rhs; + + typedef typename CwiseBinaryOpImpl< + BinaryOp, LhsType, RhsType, + typename internal::cwise_promote_storage_type::StorageKind, + typename internal::traits::StorageKind, BinaryOp>::ret>::Base + Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseBinaryOp) + + EIGEN_CHECK_BINARY_COMPATIBILIY(BinaryOp, typename Lhs::Scalar, typename Rhs::Scalar) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Lhs, Rhs) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef std::remove_reference_t LhsNested_; + typedef std::remove_reference_t RhsNested_; + +#if EIGEN_COMP_MSVC + // Required for Visual Studio or the Copy constructor will probably not get inlined! + EIGEN_STRONG_INLINE CwiseBinaryOp(const CwiseBinaryOp&) = default; +#endif + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseBinaryOp(const Lhs& aLhs, const Rhs& aRhs, + const BinaryOp& func = BinaryOp()) + : m_lhs(aLhs), m_rhs(aRhs), m_functor(func) { + eigen_assert(aLhs.rows() == aRhs.rows() && aLhs.cols() == aRhs.cols()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { + // return the fixed size type if available to enable compile time optimizations + return internal::traits>::RowsAtCompileTime == Dynamic ? m_rhs.rows() + : m_lhs.rows(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { + // return the fixed size type if available to enable compile time optimizations + return internal::traits>::ColsAtCompileTime == Dynamic ? m_rhs.cols() + : m_lhs.cols(); + } + + /** \returns the left hand side nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNested_& lhs() const { return m_lhs; } + /** \returns the right hand side nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNested_& rhs() const { return m_rhs; } + /** \returns the functor representing the binary operation */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const BinaryOp& functor() const { return m_functor; } + + protected: + LhsNested m_lhs; + RhsNested m_rhs; + const BinaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseBinaryOpImpl : public internal::generic_xpr_base>::type { + public: + typedef typename internal::generic_xpr_base>::type Base; +}; + +/** replaces \c *this by \c *this - \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase::operator-=(const MatrixBase& other) { + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +/** replaces \c *this by \c *this + \a other. + * + * \returns a reference to \c *this + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Derived& MatrixBase::operator+=(const MatrixBase& other) { + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_CWISE_BINARY_OP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CwiseNullaryOp.h b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseNullaryOp.h new file mode 100644 index 0000000000000000000000000000000000000000..f744d4d62d9e5d7d589b835e03b6556d3c6ae7c9 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseNullaryOp.h @@ -0,0 +1,975 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_NULLARY_OP_H +#define EIGEN_CWISE_NULLARY_OP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : traits { + enum { Flags = traits::Flags & RowMajorBit }; +}; + +} // namespace internal + +/** \class CwiseNullaryOp + * \ingroup Core_Module + * + * \brief Generic expression of a matrix where all coefficients are defined by a functor + * + * \tparam NullaryOp template functor implementing the operator + * \tparam PlainObjectType the underlying plain matrix/array type + * + * This class represents an expression of a generic nullary operator. + * It is the return type of the Ones(), Zero(), Constant(), Identity() and Random() methods, + * and most of the time this is the only way it is used. + * + * However, if you want to write a function returning such an expression, you + * will need to use this class. + * + * The functor NullaryOp must expose one of the following method: + + +
\c operator()() if the procedural generation does not depend on the coefficient entries + (e.g., random numbers)
\c operator()(Index i)if the procedural generation makes + sense for vectors only and that it depends on the coefficient index \c i (e.g., linspace)
\c + operator()(Index i,Index j)if the procedural generation depends on the matrix coordinates \c i, \c j (e.g., + to generate a checkerboard with 0 and 1)
+ * It is also possible to expose the last two operators if the generation makes sense for matrices but can be optimized + for vectors. + * + * See DenseBase::NullaryExpr(Index,const CustomNullaryOp&) for an example binding + * C++11 random number generators. + * + * A nullary expression can also be used to implement custom sophisticated matrix manipulations + * that cannot be covered by the existing set of natively supported matrix manipulations. + * See this \ref TopicCustomizing_NullaryExpr "page" for some examples and additional explanations + * on the behavior of CwiseNullaryOp. + * + * \sa class CwiseUnaryOp, class CwiseBinaryOp, DenseBase::NullaryExpr + */ +template +class CwiseNullaryOp : public internal::dense_xpr_base >::type, + internal::no_assignment_operator { + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(CwiseNullaryOp) + + EIGEN_DEVICE_FUNC CwiseNullaryOp(Index rows, Index cols, const NullaryOp& func = NullaryOp()) + : m_rows(rows), m_cols(cols), m_functor(func) { + eigen_assert(rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && cols >= 0 && + (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols)); + } + EIGEN_DEVICE_FUNC CwiseNullaryOp(Index size, const NullaryOp& func = NullaryOp()) + : CwiseNullaryOp(RowsAtCompileTime == 1 ? 1 : size, RowsAtCompileTime == 1 ? size : 1, func) { + EIGEN_STATIC_ASSERT(CwiseNullaryOp::IsVectorAtCompileTime, YOU_TRIED_CALLING_A_VECTOR_METHOD_ON_A_MATRIX); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows.value(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols.value(); } + + /** \returns the functor representing the nullary operation */ + EIGEN_DEVICE_FUNC const NullaryOp& functor() const { return m_functor; } + + protected: + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; + const NullaryOp m_functor; +}; + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so NullaryExpr(const CustomNullaryOp&) should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +#ifndef EIGEN_PARSED_BY_DOXYGEN + const CwiseNullaryOp::PlainObject> +#else + const CwiseNullaryOp +#endif + DenseBase::NullaryExpr(Index rows, Index cols, const CustomNullaryOp& func) { + return CwiseNullaryOp(rows, cols, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so NullaryExpr(const CustomNullaryOp&) should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * Here is an example with C++11 random generators: \include random_cpp11.cpp + * Output: \verbinclude random_cpp11.out + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +#ifndef EIGEN_PARSED_BY_DOXYGEN + const CwiseNullaryOp::PlainObject> +#else + const CwiseNullaryOp +#endif + DenseBase::NullaryExpr(Index size, const CustomNullaryOp& func) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + if (RowsAtCompileTime == 1) + return CwiseNullaryOp(1, size, func); + else + return CwiseNullaryOp(size, 1, func); +} + +/** \returns an expression of a matrix defined by a custom functor \a func + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE +#ifndef EIGEN_PARSED_BY_DOXYGEN + const CwiseNullaryOp::PlainObject> +#else + const CwiseNullaryOp +#endif + DenseBase::NullaryExpr(const CustomNullaryOp& func) { + return CwiseNullaryOp(RowsAtCompileTime, ColsAtCompileTime, func); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this DenseBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Constant(const Scalar&) should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index rows, Index cols, const Scalar& value) { + return DenseBase::NullaryExpr(rows, cols, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this DenseBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Constant(const Scalar&) should be used + * instead. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(Index size, const Scalar& value) { + return DenseBase::NullaryExpr(size, internal::scalar_constant_op(value)); +} + +/** \returns an expression of a constant matrix of value \a value + * + * This variant is only for fixed-size DenseBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * The template parameter \a CustomNullaryOp is the type of the functor. + * + * \sa class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType +DenseBase::Constant(const Scalar& value) { + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, + internal::scalar_constant_op(value)); +} + +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(Index,const Scalar&,const Scalar&) + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced_seq_deprecated.cpp + * Output: \verbinclude DenseBase_LinSpaced_seq_deprecated.out + * + * \sa LinSpaced(Index,const Scalar&, const Scalar&), setLinSpaced(Index,const Scalar&,const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, Index size, const Scalar& low, const Scalar& high) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low, high, size)); +} + +/** \deprecated because of accuracy loss. In Eigen 3.3, it is an alias for LinSpaced(const Scalar&,const Scalar&) + * + * \sa LinSpaced(const Scalar&, const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Sequential_t, const Scalar& low, const Scalar& high) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, + internal::linspaced_op(low, high, Derived::SizeAtCompileTime)); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_LinSpaced.cpp + * Output: \verbinclude DenseBase_LinSpaced.out + * + * For integer scalar types, an even spacing is possible if and only if the length of the range, + * i.e., \c high-low is a scalar multiple of \c size-1, or if \c size is a scalar multiple of the + * number of values \c high-low+1 (meaning each value can be repeated the same number of time). + * If one of these two considions is not satisfied, then \c high is lowered to the largest value + * satisfying one of this constraint. + * Here are some examples: + * + * Example: \include DenseBase_LinSpacedInt.cpp + * Output: \verbinclude DenseBase_LinSpacedInt.out + * + * \sa setLinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(Index size, const Scalar& low, const Scalar& high) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::linspaced_op(low, high, size)); +} + +/** + * \copydoc DenseBase::LinSpaced(Index, const DenseBase::Scalar&, const DenseBase::Scalar&) + * Special version for fixed size types which does not require the size parameter. + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessLinSpacedReturnType +DenseBase::LinSpaced(const Scalar& low, const Scalar& high) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, + internal::linspaced_op(low, high, Derived::SizeAtCompileTime)); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessEqualSpacedReturnType +DenseBase::EqualSpaced(Index size, const Scalar& low, const Scalar& step) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(size, internal::equalspaced_op(low, step)); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::RandomAccessEqualSpacedReturnType +DenseBase::EqualSpaced(const Scalar& low, const Scalar& step) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return DenseBase::NullaryExpr(Derived::SizeAtCompileTime, internal::equalspaced_op(low, step)); +} + +/** \returns true if all coefficients in this matrix are approximately equal to \a val, to within precision \a prec */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isApproxToConstant(const Scalar& val, const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); + for (Index j = 0; j < cols(); ++j) + for (Index i = 0; i < rows(); ++i) + if (!internal::isApprox(self.coeff(i, j), val, prec)) return false; + return true; +} + +/** This is just an alias for isApproxToConstant(). + * + * \returns true if all coefficients in this matrix are approximately equal to \a value, to within precision \a prec */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isConstant(const Scalar& val, const RealScalar& prec) const { + return isApproxToConstant(val, prec); +} + +/** Alias for setConstant(): sets all coefficients in this expression to \a val. + * + * \sa setConstant(), Constant(), class CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void DenseBase::fill(const Scalar& val) { + setConstant(val); +} + +/** Sets all coefficients in this expression to value \a val. + * + * \sa fill(), setConstant(Index,const Scalar&), setConstant(Index,Index,const Scalar&), setZero(), setOnes(), + * Constant(), class CwiseNullaryOp, setZero(), setOnes() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setConstant(const Scalar& val) { + internal::eigen_fill_impl::run(derived(), val); + return derived(); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to the given value \a val. + * + * \only_for_vectors + * + * Example: \include Matrix_setConstant_int.cpp + * Output: \verbinclude Matrix_setConstant_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,Index,const Scalar&), class CwiseNullaryOp, + * MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(Index size, const Scalar& val) { + resize(size); + return setConstant(val); +} + +/** Resizes to the given size, and sets all coefficients in this expression to the given value \a val. + * + * \param rows the new number of rows + * \param cols the new number of columns + * \param val the value to which all coefficients are set + * + * Example: \include Matrix_setConstant_int_int.cpp + * Output: \verbinclude Matrix_setConstant_int_int.out + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, + * MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(Index rows, Index cols, + const Scalar& val) { + resize(rows, cols); + return setConstant(val); +} + +/** Resizes to the given size, changing only the number of columns, and sets all + * coefficients in this expression to the given value \a val. For the parameter + * of type NoChange_t, just pass the special value \c NoChange. + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, + * MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(NoChange_t, Index cols, + const Scalar& val) { + return setConstant(rows(), cols, val); +} + +/** Resizes to the given size, changing only the number of rows, and sets all + * coefficients in this expression to the given value \a val. For the parameter + * of type NoChange_t, just pass the special value \c NoChange. + * + * \sa MatrixBase::setConstant(const Scalar&), setConstant(Index,const Scalar&), class CwiseNullaryOp, + * MatrixBase::Constant(const Scalar&) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setConstant(Index rows, NoChange_t, + const Scalar& val) { + return setConstant(rows, cols(), val); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function generates 'size' equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * Example: \include DenseBase_setLinSpaced.cpp + * Output: \verbinclude DenseBase_setLinSpaced.out + * + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(Index newSize, const Scalar& low, + const Scalar& high) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(newSize, internal::linspaced_op(low, high, newSize)); +} + +/** + * \brief Sets a linearly spaced vector. + * + * The function fills \c *this with equally spaced values in the closed interval [low,high]. + * When size is set to 1, a vector of length 1 containing 'high' is returned. + * + * \only_for_vectors + * + * For integer scalar types, do not miss the explanations on the definition + * of \link LinSpaced(Index,const Scalar&,const Scalar&) even spacing \endlink. + * + * \sa LinSpaced(Index,const Scalar&,const Scalar&), setLinSpaced(Index, const Scalar&, const Scalar&), CwiseNullaryOp + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setLinSpaced(const Scalar& low, const Scalar& high) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return setLinSpaced(size(), low, high); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setEqualSpaced(Index newSize, const Scalar& low, + const Scalar& step) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return derived() = Derived::NullaryExpr(newSize, internal::equalspaced_op(low, step)); +} +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setEqualSpaced(const Scalar& low, + const Scalar& step) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return setEqualSpaced(size(), low, step); +} + +// zero: + +/** \returns an expression of a zero matrix. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int_int.cpp + * Output: \verbinclude MatrixBase_zero_int_int.out + * + * \sa Zero(), Zero(Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero( + Index rows, Index cols) { + return ZeroReturnType(rows, cols); +} + +/** \returns an expression of a zero vector. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Zero() should be used + * instead. + * + * Example: \include MatrixBase_zero_int.cpp + * Output: \verbinclude MatrixBase_zero_int.out + * + * \sa Zero(), Zero(Index,Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero( + Index size) { + return ZeroReturnType(size); +} + +/** \returns an expression of a fixed-size zero matrix or vector. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_zero.cpp + * Output: \verbinclude MatrixBase_zero.out + * + * \sa Zero(Index), Zero(Index,Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ZeroReturnType DenseBase::Zero() { + return ZeroReturnType(RowsAtCompileTime, ColsAtCompileTime); +} + +/** \returns true if *this is approximately equal to the zero matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isZero.cpp + * Output: \verbinclude MatrixBase_isZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isZero(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); + for (Index j = 0; j < cols(); ++j) + for (Index i = 0; i < rows(); ++i) + if (!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; + return true; +} + +/** Sets all coefficients in this expression to zero. + * + * Example: \include MatrixBase_setZero.cpp + * Output: \verbinclude MatrixBase_setZero.out + * + * \sa class CwiseNullaryOp, Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setZero() { + internal::eigen_zero_impl::run(derived()); + return derived(); +} + +/** Resizes to the given \a size, and sets all coefficients in this expression to zero. + * + * \only_for_vectors + * + * Example: \include Matrix_setZero_int.cpp + * Output: \verbinclude Matrix_setZero_int.out + * + * \sa DenseBase::setZero(), setZero(Index,Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index newSize) { + resize(newSize); + return setZero(); +} + +/** Resizes to the given size, and sets all coefficients in this expression to zero. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setZero_int_int.cpp + * Output: \verbinclude Matrix_setZero_int_int.out + * + * \sa DenseBase::setZero(), setZero(Index), class CwiseNullaryOp, DenseBase::Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index rows, Index cols) { + resize(rows, cols); + return setZero(); +} + +/** Resizes to the given size, changing only the number of columns, and sets all + * coefficients in this expression to zero. For the parameter of type NoChange_t, + * just pass the special value \c NoChange. + * + * \sa DenseBase::setZero(), setZero(Index), setZero(Index, Index), setZero(Index, NoChange_t), class CwiseNullaryOp, + * DenseBase::Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(NoChange_t, Index cols) { + return setZero(rows(), cols); +} + +/** Resizes to the given size, changing only the number of rows, and sets all + * coefficients in this expression to zero. For the parameter of type NoChange_t, + * just pass the special value \c NoChange. + * + * \sa DenseBase::setZero(), setZero(Index), setZero(Index, Index), setZero(NoChange_t, Index), class CwiseNullaryOp, + * DenseBase::Zero() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setZero(Index rows, NoChange_t) { + return setZero(rows, cols()); +} + +// ones: + +/** \returns an expression of a matrix where all coefficients equal one. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int_int.cpp + * Output: \verbinclude MatrixBase_ones_int_int.out + * + * \sa Ones(), Ones(Index), isOnes(), class Ones + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones( + Index rows, Index cols) { + return Constant(rows, cols, Scalar(1)); +} + +/** \returns an expression of a vector where all coefficients equal one. + * + * The parameter \a newSize is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Ones() should be used + * instead. + * + * Example: \include MatrixBase_ones_int.cpp + * Output: \verbinclude MatrixBase_ones_int.out + * + * \sa Ones(), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones( + Index newSize) { + return Constant(newSize, Scalar(1)); +} + +/** \returns an expression of a fixed-size matrix or vector where all coefficients equal one. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_ones.cpp + * Output: \verbinclude MatrixBase_ones.out + * + * \sa Ones(Index), Ones(Index,Index), isOnes(), class Ones + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstantReturnType DenseBase::Ones() { + return Constant(Scalar(1)); +} + +/** \returns true if *this is approximately equal to the matrix where all coefficients + * are equal to 1, within the precision given by \a prec. + * + * Example: \include MatrixBase_isOnes.cpp + * Output: \verbinclude MatrixBase_isOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isOnes(const RealScalar& prec) const { + return isApproxToConstant(Scalar(1), prec); +} + +/** Sets all coefficients in this expression to one. + * + * Example: \include MatrixBase_setOnes.cpp + * Output: \verbinclude MatrixBase_setOnes.out + * + * \sa class CwiseNullaryOp, Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::setOnes() { + return setConstant(Scalar(1)); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to one. + * + * \only_for_vectors + * + * Example: \include Matrix_setOnes_int.cpp + * Output: \verbinclude Matrix_setOnes_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index,Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(Index newSize) { + resize(newSize); + return setConstant(Scalar(1)); +} + +/** Resizes to the given size, and sets all coefficients in this expression to one. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setOnes_int_int.cpp + * Output: \verbinclude Matrix_setOnes_int_int.out + * + * \sa MatrixBase::setOnes(), setOnes(Index), class CwiseNullaryOp, MatrixBase::Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(Index rows, Index cols) { + resize(rows, cols); + return setConstant(Scalar(1)); +} + +/** Resizes to the given size, changing only the number of rows, and sets all + * coefficients in this expression to one. For the parameter of type NoChange_t, + * just pass the special value \c NoChange. + * + * \sa MatrixBase::setOnes(), setOnes(Index), setOnes(Index, Index), setOnes(NoChange_t, Index), class CwiseNullaryOp, + * MatrixBase::Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(Index rows, NoChange_t) { + return setOnes(rows, cols()); +} + +/** Resizes to the given size, changing only the number of columns, and sets all + * coefficients in this expression to one. For the parameter of type NoChange_t, + * just pass the special value \c NoChange. + * + * \sa MatrixBase::setOnes(), setOnes(Index), setOnes(Index, Index), setOnes(Index, NoChange_t) class CwiseNullaryOp, + * MatrixBase::Ones() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& PlainObjectBase::setOnes(NoChange_t, Index cols) { + return setOnes(rows(), cols); +} + +// Identity: + +/** \returns an expression of the identity matrix (not necessarily square). + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Identity() should be used + * instead. + * + * Example: \include MatrixBase_identity_int_int.cpp + * Output: \verbinclude MatrixBase_identity_int_int.out + * + * \sa Identity(), setIdentity(), isIdentity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity(Index rows, Index cols) { + return DenseBase::NullaryExpr(rows, cols, internal::scalar_identity_op()); +} + +/** \returns an expression of the identity matrix (not necessarily square). + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variant taking size arguments. + * + * Example: \include MatrixBase_identity.cpp + * Output: \verbinclude MatrixBase_identity.out + * + * \sa Identity(Index,Index), setIdentity(), isIdentity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::IdentityReturnType +MatrixBase::Identity() { + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + return MatrixBase::NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_identity_op()); +} + +/** \returns true if *this is approximately equal to the identity matrix + * (not necessarily square), + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isIdentity.cpp + * Output: \verbinclude MatrixBase_isIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), setIdentity() + */ +template +bool MatrixBase::isIdentity(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); + for (Index j = 0; j < cols(); ++j) { + for (Index i = 0; i < rows(); ++i) { + if (i == j) { + if (!internal::isApprox(self.coeff(i, j), static_cast(1), prec)) return false; + } else { + if (!internal::isMuchSmallerThan(self.coeff(i, j), static_cast(1), prec)) return false; + } + } + } + return true; +} + +namespace internal { + +template = 16)> +struct setIdentity_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { + return m = Derived::Identity(m.rows(), m.cols()); + } +}; + +template +struct setIdentity_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Derived& run(Derived& m) { + m.setZero(); + const Index size = numext::mini(m.rows(), m.cols()); + for (Index i = 0; i < size; ++i) m.coeffRef(i, i) = typename Derived::Scalar(1); + return m; + } +}; + +} // end namespace internal + +/** Writes the identity expression (not necessarily square) into *this. + * + * Example: \include MatrixBase_setIdentity.cpp + * Output: \verbinclude MatrixBase_setIdentity.out + * + * \sa class CwiseNullaryOp, Identity(), Identity(Index,Index), isIdentity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity() { + return internal::setIdentity_impl::run(derived()); +} + +/** \brief Resizes to the given size, and writes the identity expression (not necessarily square) into *this. + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setIdentity_int_int.cpp + * Output: \verbinclude Matrix_setIdentity_int_int.out + * + * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Identity() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setIdentity(Index rows, Index cols) { + derived().resize(rows, cols); + return setIdentity(); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit( + Index newSize, Index i) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(newSize, newSize), i); +} + +/** \returns an expression of the i-th unit (basis) vector. + * + * \only_for_vectors + * + * This variant is for fixed-size vector only. + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::UnitX(), MatrixBase::UnitY(), MatrixBase::UnitZ(), MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::Unit( + Index i) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + return BasisReturnType(SquareMatrixType::Identity(), i); +} + +/** \returns an expression of the X axis unit vector (1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), + * MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitX() { + return Derived::Unit(0); +} + +/** \returns an expression of the Y axis unit vector (0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), + * MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitY() { + return Derived::Unit(1); +} + +/** \returns an expression of the Z axis unit vector (0,0,1{,0}^*) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), + * MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitZ() { + return Derived::Unit(2); +} + +/** \returns an expression of the W axis unit vector (0,0,0,1) + * + * \only_for_vectors + * + * \sa MatrixBase::Unit(Index,Index), MatrixBase::Unit(Index), MatrixBase::UnitY(), MatrixBase::UnitZ(), + * MatrixBase::UnitW() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::BasisReturnType MatrixBase::UnitW() { + return Derived::Unit(3); +} + +/** \brief Set the coefficients of \c *this to the i-th unit (basis) vector + * + * \param i index of the unique coefficient to be set to 1 + * + * \only_for_vectors + * + * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Unit(Index,Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setUnit(Index i) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + eigen_assert(i < size()); + derived().setZero(); + derived().coeffRef(i) = Scalar(1); + return derived(); +} + +/** \brief Resizes to the given \a newSize, and writes the i-th unit (basis) vector into *this. + * + * \param newSize the new size of the vector + * \param i index of the unique coefficient to be set to 1 + * + * \only_for_vectors + * + * \sa MatrixBase::setIdentity(), class CwiseNullaryOp, MatrixBase::Unit(Index,Index) + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& MatrixBase::setUnit(Index newSize, Index i) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + eigen_assert(i < newSize); + derived().resize(newSize); + return setUnit(i); +} + +} // end namespace Eigen + +#endif // EIGEN_CWISE_NULLARY_OP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CwiseTernaryOp.h b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseTernaryOp.h new file mode 100644 index 0000000000000000000000000000000000000000..88a1a9598fdd383b3b6a4491eaee7b64086ca99a --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseTernaryOp.h @@ -0,0 +1,171 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2016 Eugene Brevdo +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_TERNARY_OP_H +#define EIGEN_CWISE_TERNARY_OP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits> { + // we must not inherit from traits since it has + // the potential to cause problems with MSVC + typedef remove_all_t Ancestor; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = traits::RowsAtCompileTime, + ColsAtCompileTime = traits::ColsAtCompileTime, + MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = traits::MaxColsAtCompileTime + }; + + // even though we require Arg1, Arg2, and Arg3 to have the same scalar type + // (see CwiseTernaryOp constructor), + // we still want to handle the case when the result type is different. + typedef typename result_of::type Scalar; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + typedef typename Arg1::Nested Arg1Nested; + typedef typename Arg2::Nested Arg2Nested; + typedef typename Arg3::Nested Arg3Nested; + typedef std::remove_reference_t Arg1Nested_; + typedef std::remove_reference_t Arg2Nested_; + typedef std::remove_reference_t Arg3Nested_; + enum { Flags = Arg1Nested_::Flags & RowMajorBit }; +}; +} // end namespace internal + +template +class CwiseTernaryOpImpl; + +/** \class CwiseTernaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise ternary operator is + * applied to two expressions + * + * \tparam TernaryOp template functor implementing the operator + * \tparam Arg1Type the type of the first argument + * \tparam Arg2Type the type of the second argument + * \tparam Arg3Type the type of the third argument + * + * This class represents an expression where a coefficient-wise ternary + * operator is applied to three expressions. + * It is the return type of ternary operators, by which we mean only those + * ternary operators where + * all three arguments are Eigen expressions. + * For example, the return type of betainc(matrix1, matrix2, matrix3) is a + * CwiseTernaryOp. + * + * Most of the time, this is the only way that it is used, so you typically + * don't have to name + * CwiseTernaryOp types explicitly. + * + * \sa MatrixBase::ternaryExpr(const MatrixBase &, const + * MatrixBase &, const CustomTernaryOp &) const, class CwiseBinaryOp, + * class CwiseUnaryOp, class CwiseNullaryOp + */ +template +class CwiseTernaryOp : public CwiseTernaryOpImpl::StorageKind>, + internal::no_assignment_operator { + public: + typedef internal::remove_all_t Arg1; + typedef internal::remove_all_t Arg2; + typedef internal::remove_all_t Arg3; + + // require the sizes to match + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg2) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(Arg1, Arg3) + + // The index types should match + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + EIGEN_STATIC_ASSERT((internal::is_same::StorageKind, + typename internal::traits::StorageKind>::value), + STORAGE_KIND_MUST_MATCH) + + typedef typename CwiseTernaryOpImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseTernaryOp) + + typedef typename internal::ref_selector::type Arg1Nested; + typedef typename internal::ref_selector::type Arg2Nested; + typedef typename internal::ref_selector::type Arg3Nested; + typedef std::remove_reference_t Arg1Nested_; + typedef std::remove_reference_t Arg2Nested_; + typedef std::remove_reference_t Arg3Nested_; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CwiseTernaryOp(const Arg1& a1, const Arg2& a2, const Arg3& a3, + const TernaryOp& func = TernaryOp()) + : m_arg1(a1), m_arg2(a2), m_arg3(a3), m_functor(func) { + eigen_assert(a1.rows() == a2.rows() && a1.cols() == a2.cols() && a1.rows() == a3.rows() && a1.cols() == a3.cols()); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rows() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits>::RowsAtCompileTime == Dynamic && + internal::traits>::RowsAtCompileTime == Dynamic) + return m_arg3.rows(); + else if (internal::traits>::RowsAtCompileTime == Dynamic && + internal::traits>::RowsAtCompileTime == Dynamic) + return m_arg2.rows(); + else + return m_arg1.rows(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index cols() const { + // return the fixed size type if available to enable compile time + // optimizations + if (internal::traits>::ColsAtCompileTime == Dynamic && + internal::traits>::ColsAtCompileTime == Dynamic) + return m_arg3.cols(); + else if (internal::traits>::ColsAtCompileTime == Dynamic && + internal::traits>::ColsAtCompileTime == Dynamic) + return m_arg2.cols(); + else + return m_arg1.cols(); + } + + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC const Arg1Nested_& arg1() const { return m_arg1; } + /** \returns the first argument nested expression */ + EIGEN_DEVICE_FUNC const Arg2Nested_& arg2() const { return m_arg2; } + /** \returns the third argument nested expression */ + EIGEN_DEVICE_FUNC const Arg3Nested_& arg3() const { return m_arg3; } + /** \returns the functor representing the ternary operation */ + EIGEN_DEVICE_FUNC const TernaryOp& functor() const { return m_functor; } + + protected: + Arg1Nested m_arg1; + Arg2Nested m_arg2; + Arg3Nested m_arg3; + const TernaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseTernaryOpImpl : public internal::generic_xpr_base>::type { + public: + typedef typename internal::generic_xpr_base>::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_TERNARY_OP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CwiseUnaryOp.h b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseUnaryOp.h new file mode 100644 index 0000000000000000000000000000000000000000..61a8ffba9f95b3c6b648aeab5675780b2b5b88d0 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseUnaryOp.h @@ -0,0 +1,91 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2014 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_OP_H +#define EIGEN_CWISE_UNARY_OP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : traits { + typedef typename result_of::type Scalar; + typedef typename XprType::Nested XprTypeNested; + typedef std::remove_reference_t XprTypeNested_; + enum { Flags = XprTypeNested_::Flags & RowMajorBit }; +}; +} // namespace internal + +template +class CwiseUnaryOpImpl; + +/** \class CwiseUnaryOp + * \ingroup Core_Module + * + * \brief Generic expression where a coefficient-wise unary operator is applied to an expression + * + * \tparam UnaryOp template functor implementing the operator + * \tparam XprType the type of the expression to which we are applying the unary operator + * + * This class represents an expression where a unary operator is applied to an expression. + * It is the return type of all operations taking exactly 1 input expression, regardless of the + * presence of other inputs such as scalars. For example, the operator* in the expression 3*matrix + * is considered unary, because only the right-hand side is an expression, and its + * return type is a specialization of CwiseUnaryOp. + * + * Most of the time, this is the only way that it is used, so you typically don't have to name + * CwiseUnaryOp types explicitly. + * + * \sa MatrixBase::unaryExpr(const CustomUnaryOp &) const, class CwiseBinaryOp, class CwiseNullaryOp + */ +template +class CwiseUnaryOp : public CwiseUnaryOpImpl::StorageKind>, + internal::no_assignment_operator { + public: + typedef typename CwiseUnaryOpImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryOp) + typedef typename internal::ref_selector::type XprTypeNested; + typedef internal::remove_all_t NestedExpression; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit CwiseUnaryOp(const XprType& xpr, const UnaryOp& func = UnaryOp()) + : m_xpr(xpr), m_functor(func) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_xpr.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_xpr.cols(); } + + /** \returns the functor representing the unary operation */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const UnaryOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const { + return m_xpr; + } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE internal::remove_all_t& nestedExpression() { return m_xpr; } + + protected: + XprTypeNested m_xpr; + const UnaryOp m_functor; +}; + +// Generic API dispatcher +template +class CwiseUnaryOpImpl : public internal::generic_xpr_base >::type { + public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +} // end namespace Eigen + +#endif // EIGEN_CWISE_UNARY_OP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/CwiseUnaryView.h b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseUnaryView.h new file mode 100644 index 0000000000000000000000000000000000000000..55866b1814ce92e7b5da1ff64bdb915d230743a6 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/CwiseUnaryView.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CWISE_UNARY_VIEW_H +#define EIGEN_CWISE_UNARY_VIEW_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : traits { + typedef typename result_of::Scalar&)>::type1 ScalarRef; + static_assert(std::is_reference::value, "Views must return a reference type."); + typedef remove_all_t Scalar; + typedef typename MatrixType::Nested MatrixTypeNested; + typedef remove_all_t MatrixTypeNested_; + enum { + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = + traits::Flags & + (RowMajorBit | FlagsLvalueBit | DirectAccessBit), // FIXME DirectAccessBit should not be handled by expressions + MatrixTypeInnerStride = inner_stride_at_compile_time::ret, + // need to cast the sizeof's from size_t to int explicitly, otherwise: + // "error: no integral type can represent all of the enumerator values + InnerStrideAtCompileTime = + StrideType::InnerStrideAtCompileTime == 0 + ? (MatrixTypeInnerStride == Dynamic + ? int(Dynamic) + : int(MatrixTypeInnerStride) * int(sizeof(typename traits::Scalar) / sizeof(Scalar))) + : int(StrideType::InnerStrideAtCompileTime), + + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? (outer_stride_at_compile_time::ret == Dynamic + ? int(Dynamic) + : outer_stride_at_compile_time::ret * + int(sizeof(typename traits::Scalar) / sizeof(Scalar))) + : int(StrideType::OuterStrideAtCompileTime) + }; +}; + +// Generic API dispatcher +template ::value> +class CwiseUnaryViewImpl : public generic_xpr_base >::type { + public: + typedef typename generic_xpr_base >::type Base; +}; + +template +class CwiseUnaryViewImpl + : public dense_xpr_base >::type { + public: + typedef CwiseUnaryView Derived; + typedef typename dense_xpr_base >::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(this->coeffRef(0)); } + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { + return StrideType::InnerStrideAtCompileTime != 0 ? int(StrideType::InnerStrideAtCompileTime) + : derived().nestedExpression().innerStride() * + sizeof(typename traits::Scalar) / sizeof(Scalar); + } + + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { + return StrideType::OuterStrideAtCompileTime != 0 ? int(StrideType::OuterStrideAtCompileTime) + : derived().nestedExpression().outerStride() * + sizeof(typename traits::Scalar) / sizeof(Scalar); + } + + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl) + + // Allow const access to coeffRef for the case of direct access being enabled. + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { + return internal::evaluator(derived()).coeffRef(index); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index col) const { + return internal::evaluator(derived()).coeffRef(row, col); + } +}; + +template +class CwiseUnaryViewImpl + : public CwiseUnaryViewImpl { + public: + typedef CwiseUnaryViewImpl Base; + typedef CwiseUnaryView Derived; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryViewImpl) + + using Base::data; + EIGEN_DEVICE_FUNC inline Scalar* data() { return &(this->coeffRef(0)); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + return internal::evaluator(derived()).coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + return internal::evaluator(derived()).coeffRef(index); + } + + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(CwiseUnaryViewImpl) +}; + +} // namespace internal + +/** \class CwiseUnaryView + * \ingroup Core_Module + * + * \brief Generic lvalue expression of a coefficient-wise unary operator of a matrix or a vector + * + * \tparam ViewOp template functor implementing the view + * \tparam MatrixType the type of the matrix we are applying the unary operator + * + * This class represents a lvalue expression of a generic unary view operator of a matrix or a vector. + * It is the return type of real() and imag(), and most of the time this is the only way it is used. + * + * \sa MatrixBase::unaryViewExpr(const CustomUnaryOp &) const, class CwiseUnaryOp + */ +template +class CwiseUnaryView : public internal::CwiseUnaryViewImpl::StorageKind> { + public: + typedef typename internal::CwiseUnaryViewImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(CwiseUnaryView) + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + typedef internal::remove_all_t NestedExpression; + + explicit EIGEN_DEVICE_FUNC inline CwiseUnaryView(MatrixType& mat, const ViewOp& func = ViewOp()) + : m_matrix(mat), m_functor(func) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(CwiseUnaryView) + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_matrix.cols(); } + + /** \returns the functor representing unary operation */ + EIGEN_DEVICE_FUNC const ViewOp& functor() const { return m_functor; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { return m_matrix; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC std::remove_reference_t& nestedExpression() { return m_matrix; } + + protected: + MatrixTypeNested m_matrix; + ViewOp m_functor; +}; + +} // namespace Eigen + +#endif // EIGEN_CWISE_UNARY_VIEW_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/DenseBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/DenseBase.h new file mode 100644 index 0000000000000000000000000000000000000000..603319492e8d7d59ea7865ba44682e6f8b9f7aed --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/DenseBase.h @@ -0,0 +1,673 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSEBASE_H +#define EIGEN_DENSEBASE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +// The index type defined by EIGEN_DEFAULT_DENSE_INDEX_TYPE must be a signed type. +EIGEN_STATIC_ASSERT(NumTraits::IsSigned, THE_INDEX_TYPE_MUST_BE_A_SIGNED_TYPE) + +/** \class DenseBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and arrays + * + * This class is the base that is inherited by all dense objects (matrix, vector, arrays, + * and related expression types). The common Eigen API for dense objects is contained in this class. + * + * \tparam Derived is the derived type, e.g., a matrix type or an expression. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_DENSEBASE_PLUGIN. + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseBase +#ifndef EIGEN_PARSED_BY_DOXYGEN + : public DenseCoeffsBase::value> +#else + : public DenseCoeffsBase +#endif // not EIGEN_PARSED_BY_DOXYGEN +{ + public: + /** Inner iterator type to iterate over the coefficients of a row or column. + * \sa class InnerIterator + */ + typedef Eigen::InnerIterator InnerIterator; + + typedef typename internal::traits::StorageKind StorageKind; + + /** + * \brief The type used to store indices + * \details This typedef is relevant for types that store multiple indices such as + * PermutationMatrix or Transpositions, otherwise it defaults to Eigen::Index + * \sa \blank \ref TopicPreprocessorDirectives, Eigen::Index, SparseMatrixBase. + */ + typedef typename internal::traits::StorageIndex StorageIndex; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. */ + typedef typename internal::traits::Scalar Scalar; + + /** The numeric type of the expression' coefficients, e.g. float, double, int or std::complex, etc. + * + * It is an alias for the Scalar type */ + typedef Scalar value_type; + + typedef typename NumTraits::Real RealScalar; + typedef DenseCoeffsBase::value> Base; + + using Base::coeff; + using Base::coeffByOuterInner; + using Base::colIndexByOuterInner; + using Base::cols; + using Base::const_cast_derived; + using Base::derived; + using Base::rowIndexByOuterInner; + using Base::rows; + using Base::size; + using Base::operator(); + using Base::operator[]; + using Base::colStride; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + using Base::stride; + using Base::w; + using Base::x; + using Base::y; + using Base::z; + typedef typename Base::CoeffReturnType CoeffReturnType; + + enum { + + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + /**< The number of rows at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), ColsAtCompileTime, SizeAtCompileTime */ + + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + /**< The number of columns at compile-time. This is just a copy of the value provided + * by the \a Derived type. If a value is not known at compile-time, + * it is set to the \a Dynamic constant. + * \sa MatrixBase::rows(), MatrixBase::cols(), RowsAtCompileTime, SizeAtCompileTime */ + + SizeAtCompileTime = (internal::size_of_xpr_at_compile_time::ret), + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ + + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + /**< This value is equal to the maximum possible number of rows that this expression + * might have. If this expression might have an arbitrarily high number of rows, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa RowsAtCompileTime, MaxColsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + /**< This value is equal to the maximum possible number of columns that this expression + * might have. If this expression might have an arbitrarily high number of columns, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa ColsAtCompileTime, MaxRowsAtCompileTime, MaxSizeAtCompileTime + */ + + MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime), + /**< This value is equal to the maximum possible number of coefficients that this expression + * might have. If this expression might have an arbitrarily high number of coefficients, + * this value is set to \a Dynamic. + * + * This value is useful to know when evaluating an expression, in order to determine + * whether it is possible to avoid doing a dynamic memory allocation. + * + * \sa SizeAtCompileTime, MaxRowsAtCompileTime, MaxColsAtCompileTime + */ + + IsVectorAtCompileTime = + internal::traits::RowsAtCompileTime == 1 || internal::traits::ColsAtCompileTime == 1, + /**< This is set to true if either the number of rows or the number of + * columns is known at compile-time to be equal to 1. Indeed, in that case, + * we are dealing with a column-vector (if there is only one column) or with + * a row-vector (if there is only one row). */ + + NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 + : bool(IsVectorAtCompileTime) ? 1 + : 2, + /**< This value is equal to Tensor::NumDimensions, i.e. 0 for scalars, 1 for vectors, + * and 2 for matrices. + */ + + Flags = internal::traits::Flags, + /**< This stores expression \ref flags flags which may or may not be inherited by new expressions + * constructed from this one. See the \ref flags "list of flags". + */ + + IsRowMajor = int(Flags) & RowMajorBit, /**< True if this expression has row-major storage order. */ + + InnerSizeAtCompileTime = int(IsVectorAtCompileTime) ? int(SizeAtCompileTime) + : int(IsRowMajor) ? int(ColsAtCompileTime) + : int(RowsAtCompileTime), + + InnerStrideAtCompileTime = internal::inner_stride_at_compile_time::ret, + OuterStrideAtCompileTime = internal::outer_stride_at_compile_time::ret + }; + + typedef typename internal::find_best_packet::type PacketScalar; + + enum { IsPlainObjectBase = 0 }; + + /** The plain matrix type corresponding to this expression. + * \sa PlainObject */ + typedef Matrix::Scalar, internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags & RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, internal::traits::MaxColsAtCompileTime> + PlainMatrix; + + /** The plain array type corresponding to this expression. + * \sa PlainObject */ + typedef Array::Scalar, internal::traits::RowsAtCompileTime, + internal::traits::ColsAtCompileTime, + AutoAlign | (internal::traits::Flags & RowMajorBit ? RowMajor : ColMajor), + internal::traits::MaxRowsAtCompileTime, internal::traits::MaxColsAtCompileTime> + PlainArray; + + /** \brief The plain matrix or array type corresponding to this expression. + * + * This is not necessarily exactly the return type of eval(). In the case of plain matrices, + * the return type of eval() is a const reference to a matrix, not a matrix! It is however guaranteed + * that the return type of eval() is either PlainObject or const PlainObject&. + */ + typedef std::conditional_t::XprKind, MatrixXpr>::value, + PlainMatrix, PlainArray> + PlainObject; + + /** \returns the outer size. + * + * \note For a vector, this returns just 1. For a matrix (non-vector), this is the major dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of columns for a + * column-major matrix, and the number of rows for a row-major matrix. */ + EIGEN_DEVICE_FUNC constexpr Index outerSize() const { + return IsVectorAtCompileTime ? 1 : int(IsRowMajor) ? this->rows() : this->cols(); + } + + /** \returns the inner size. + * + * \note For a vector, this is just the size. For a matrix (non-vector), this is the minor dimension + * with respect to the \ref TopicStorageOrders "storage order", i.e., the number of rows for a + * column-major matrix, and the number of columns for a row-major matrix. */ + EIGEN_DEVICE_FUNC constexpr Index innerSize() const { + return IsVectorAtCompileTime ? this->size() : int(IsRowMajor) ? this->cols() : this->rows(); + } + + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and + * does nothing else. + */ + EIGEN_DEVICE_FUNC void resize(Index newSize) { + EIGEN_ONLY_USED_FOR_DEBUG(newSize); + eigen_assert(newSize == this->size() && "DenseBase::resize() does not actually allow to resize."); + } + /** Only plain matrices/arrays, not expressions, may be resized; therefore the only useful resize methods are + * Matrix::resize() and Array::resize(). The present method only asserts that the new size equals the old size, and + * does nothing else. + */ + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { + EIGEN_ONLY_USED_FOR_DEBUG(rows); + EIGEN_ONLY_USED_FOR_DEBUG(cols); + eigen_assert(rows == this->rows() && cols == this->cols() && + "DenseBase::resize() does not actually allow to resize."); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp, PlainObject> ConstantReturnType; + /** \internal Represents a matrix with all coefficients equal to zero*/ + typedef CwiseNullaryOp, PlainObject> ZeroReturnType; + /** \internal \deprecated Represents a vector with linearly spaced coefficients that allows sequential access only. */ + EIGEN_DEPRECATED typedef CwiseNullaryOp, PlainObject> SequentialLinSpacedReturnType; + /** \internal Represents a vector with linearly spaced coefficients that allows random access. */ + typedef CwiseNullaryOp, PlainObject> RandomAccessLinSpacedReturnType; + /** \internal Represents a vector with equally spaced coefficients that allows random access. */ + typedef CwiseNullaryOp, PlainObject> RandomAccessEqualSpacedReturnType; + /** \internal the return type of MatrixBase::eigenvalues() */ + typedef Matrix::Scalar>::Real, + internal::traits::ColsAtCompileTime, 1> + EigenvaluesReturnType; + +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** Copies \a other into *this. \returns a reference to *this. */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); + + template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC Derived& operator+=(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC Derived& operator-=(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& func); + + /** \internal + * Copies \a other into *this without evaluating other. \returns a reference to *this. */ + template + /** \deprecated */ + EIGEN_DEPRECATED EIGEN_DEVICE_FUNC Derived& lazyAssign(const DenseBase& other); + + EIGEN_DEVICE_FUNC CommaInitializer operator<<(const Scalar& s); + + template + /** \deprecated it now returns \c *this */ + EIGEN_DEPRECATED const Derived& flagged() const { + return derived(); + } + + template + EIGEN_DEVICE_FUNC CommaInitializer operator<<(const DenseBase& other); + + typedef Transpose TransposeReturnType; + EIGEN_DEVICE_FUNC TransposeReturnType transpose(); + typedef Transpose ConstTransposeReturnType; + EIGEN_DEVICE_FUNC const ConstTransposeReturnType transpose() const; + EIGEN_DEVICE_FUNC void transposeInPlace(); + + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index rows, Index cols, const Scalar& value); + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(Index size, const Scalar& value); + EIGEN_DEVICE_FUNC static const ConstantReturnType Constant(const Scalar& value); + + EIGEN_DEPRECATED_WITH_REASON("The method may result in accuracy loss. Use .EqualSpaced() instead.") + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, Index size, const Scalar& low, + const Scalar& high); + EIGEN_DEPRECATED_WITH_REASON("The method may result in accuracy loss. Use .EqualSpaced() instead.") + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Sequential_t, const Scalar& low, + const Scalar& high); + + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(Index size, const Scalar& low, + const Scalar& high); + EIGEN_DEVICE_FUNC static const RandomAccessLinSpacedReturnType LinSpaced(const Scalar& low, const Scalar& high); + + EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType EqualSpaced(Index size, const Scalar& low, + const Scalar& step); + EIGEN_DEVICE_FUNC static const RandomAccessEqualSpacedReturnType EqualSpaced(const Scalar& low, const Scalar& step); + + template + EIGEN_DEVICE_FUNC static const CwiseNullaryOp NullaryExpr(Index rows, Index cols, + const CustomNullaryOp& func); + template + EIGEN_DEVICE_FUNC static const CwiseNullaryOp NullaryExpr(Index size, + const CustomNullaryOp& func); + template + EIGEN_DEVICE_FUNC static const CwiseNullaryOp NullaryExpr(const CustomNullaryOp& func); + + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(Index size); + EIGEN_DEVICE_FUNC static const ZeroReturnType Zero(); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(Index size); + EIGEN_DEVICE_FUNC static const ConstantReturnType Ones(); + + EIGEN_DEVICE_FUNC void fill(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setConstant(const Scalar& value); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(Index size, const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setLinSpaced(const Scalar& low, const Scalar& high); + EIGEN_DEVICE_FUNC Derived& setEqualSpaced(Index size, const Scalar& low, const Scalar& step); + EIGEN_DEVICE_FUNC Derived& setEqualSpaced(const Scalar& low, const Scalar& step); + EIGEN_DEVICE_FUNC Derived& setZero(); + EIGEN_DEVICE_FUNC Derived& setOnes(); + EIGEN_DEVICE_FUNC Derived& setRandom(); + + template + EIGEN_DEVICE_FUNC bool isApprox(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const RealScalar& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + template + EIGEN_DEVICE_FUNC bool isMuchSmallerThan(const DenseBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + + EIGEN_DEVICE_FUNC bool isApproxToConstant(const Scalar& value, + const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isConstant(const Scalar& value, + const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isZero(const RealScalar& prec = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC bool isOnes(const RealScalar& prec = NumTraits::dummy_precision()) const; + + EIGEN_DEVICE_FUNC inline bool hasNaN() const; + EIGEN_DEVICE_FUNC inline bool allFinite() const; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const Scalar& other); + template ::value, typename = std::enable_if_t> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator*=(const RealScalar& other); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const Scalar& other); + template ::value, typename = std::enable_if_t> + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator/=(const RealScalar& other); + + typedef internal::add_const_on_value_type_t::type> EvalReturnType; + /** \returns the matrix or vector obtained by evaluating this expression. + * + * Notice that in the case of a plain matrix or vector (not an expression) this function just returns + * a const reference, in order to avoid a useless copy. + * + * \warning Be careful with eval() and the auto C++ keyword, as detailed in this \link TopicPitfalls_auto_keyword page + * \endlink. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE EvalReturnType eval() const { + // Even though MSVC does not honor strong inlining when the return type + // is a dynamic matrix, we desperately need strong inlining for fixed + // size types on MSVC. + return typename internal::eval::type(derived()); + } + + /** swaps *this with the expression \a other. + * + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void swap(const DenseBase& other) { + EIGEN_STATIC_ASSERT(!OtherDerived::IsPlainObjectBase, THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + eigen_assert(rows() == other.rows() && cols() == other.cols()); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); + } + + /** swaps *this with the matrix or array \a other. + * + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void swap(PlainObjectBase& other) { + eigen_assert(rows() == other.rows() && cols() == other.cols()); + call_assignment(derived(), other.derived(), internal::swap_assign_op()); + } + + EIGEN_DEVICE_FUNC inline const NestByValue nestByValue() const; + EIGEN_DEVICE_FUNC inline const ForceAlignedAccess forceAlignedAccess() const; + EIGEN_DEVICE_FUNC inline ForceAlignedAccess forceAlignedAccess(); + template + EIGEN_DEVICE_FUNC inline const std::conditional_t, Derived&> + forceAlignedAccessIf() const; + template + EIGEN_DEVICE_FUNC inline std::conditional_t, Derived&> forceAlignedAccessIf(); + + EIGEN_DEVICE_FUNC Scalar sum() const; + EIGEN_DEVICE_FUNC Scalar mean() const; + EIGEN_DEVICE_FUNC Scalar trace() const; + + EIGEN_DEVICE_FUNC Scalar prod() const; + + template + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff() const; + template + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff() const; + + // By default, the fastest version with undefined NaN propagation semantics is + // used. + // TODO(rmlarsen): Replace with default template argument when we move to + // c++11 or beyond. + EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar minCoeff() const { + return minCoeff(); + } + EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar maxCoeff() const { + return maxCoeff(); + } + + template + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const; + template + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const; + template + EIGEN_DEVICE_FUNC typename internal::traits::Scalar minCoeff(IndexType* index) const; + template + EIGEN_DEVICE_FUNC typename internal::traits::Scalar maxCoeff(IndexType* index) const; + + // TODO(rmlarsen): Replace these methods with a default template argument. + template + EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar minCoeff(IndexType* row, IndexType* col) const { + return minCoeff(row, col); + } + template + EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar maxCoeff(IndexType* row, IndexType* col) const { + return maxCoeff(row, col); + } + template + EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar minCoeff(IndexType* index) const { + return minCoeff(index); + } + template + EIGEN_DEVICE_FUNC inline typename internal::traits::Scalar maxCoeff(IndexType* index) const { + return maxCoeff(index); + } + + template + EIGEN_DEVICE_FUNC Scalar redux(const BinaryOp& func) const; + + template + EIGEN_DEVICE_FUNC void visit(Visitor& func) const; + + /** \returns a WithFormat proxy object allowing to print a matrix the with given + * format \a fmt. + * + * See class IOFormat for some examples. + * + * \sa class IOFormat, class WithFormat + */ + inline const WithFormat format(const IOFormat& fmt) const { return WithFormat(derived(), fmt); } + + /** \returns the unique coefficient of a 1x1 expression */ + EIGEN_DEVICE_FUNC CoeffReturnType value() const { + EIGEN_STATIC_ASSERT_SIZE_1x1(Derived) eigen_assert(this->rows() == 1 && this->cols() == 1); + return derived().coeff(0, 0); + } + + EIGEN_DEVICE_FUNC bool all() const; + EIGEN_DEVICE_FUNC bool any() const; + EIGEN_DEVICE_FUNC Index count() const; + + typedef VectorwiseOp RowwiseReturnType; + typedef const VectorwiseOp ConstRowwiseReturnType; + typedef VectorwiseOp ColwiseReturnType; + typedef const VectorwiseOp ConstColwiseReturnType; + + /** \returns a VectorwiseOp wrapper of *this for broadcasting and partial reductions + * + * Example: \include MatrixBase_rowwise.cpp + * Output: \verbinclude MatrixBase_rowwise.out + * + * \sa colwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + // Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC inline ConstRowwiseReturnType rowwise() const { return ConstRowwiseReturnType(derived()); } + EIGEN_DEVICE_FUNC RowwiseReturnType rowwise(); + + /** \returns a VectorwiseOp wrapper of *this broadcasting and partial reductions + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * \sa rowwise(), class VectorwiseOp, \ref TutorialReductionsVisitorsBroadcasting + */ + EIGEN_DEVICE_FUNC inline ConstColwiseReturnType colwise() const { return ConstColwiseReturnType(derived()); } + EIGEN_DEVICE_FUNC ColwiseReturnType colwise(); + + typedef CwiseNullaryOp, PlainObject> RandomReturnType; + static const RandomReturnType Random(Index rows, Index cols); + static const RandomReturnType Random(Index size); + static const RandomReturnType Random(); + + template + inline EIGEN_DEVICE_FUNC + CwiseTernaryOp::Scalar, + typename DenseBase::Scalar, Scalar>, + ThenDerived, ElseDerived, Derived> + select(const DenseBase& thenMatrix, const DenseBase& elseMatrix) const; + + template + inline EIGEN_DEVICE_FUNC + CwiseTernaryOp::Scalar, + typename DenseBase::Scalar, Scalar>, + ThenDerived, typename DenseBase::ConstantReturnType, Derived> + select(const DenseBase& thenMatrix, const typename DenseBase::Scalar& elseScalar) const; + + template + inline EIGEN_DEVICE_FUNC + CwiseTernaryOp::Scalar, + typename DenseBase::Scalar, Scalar>, + typename DenseBase::ConstantReturnType, ElseDerived, Derived> + select(const typename DenseBase::Scalar& thenScalar, const DenseBase& elseMatrix) const; + + template + RealScalar lpNorm() const; + + template + EIGEN_DEVICE_FUNC const Replicate replicate() const; + /** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate_int_int.cpp + * Output: \verbinclude MatrixBase_replicate_int_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ + // Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC const Replicate replicate(Index rowFactor, Index colFactor) const { + return Replicate(derived(), rowFactor, colFactor); + } + + typedef Reverse ReverseReturnType; + typedef const Reverse ConstReverseReturnType; + EIGEN_DEVICE_FUNC ReverseReturnType reverse(); + /** This is the const version of reverse(). */ + // Code moved here due to a CUDA compiler bug + EIGEN_DEVICE_FUNC ConstReverseReturnType reverse() const { return ConstReverseReturnType(derived()); } + EIGEN_DEVICE_FUNC void reverseInPlace(); + +#ifdef EIGEN_PARSED_BY_DOXYGEN + /** STL-like RandomAccessIterator + * iterator type as returned by the begin() and end() methods. + */ + typedef random_access_iterator_type iterator; + /** This is the const version of iterator (aka read-only) */ + typedef random_access_iterator_type const_iterator; +#else + typedef std::conditional_t<(Flags & DirectAccessBit) == DirectAccessBit, + internal::pointer_based_stl_iterator, + internal::generic_randaccess_stl_iterator > + iterator_type; + + typedef std::conditional_t<(Flags & DirectAccessBit) == DirectAccessBit, + internal::pointer_based_stl_iterator, + internal::generic_randaccess_stl_iterator > + const_iterator_type; + + // Stl-style iterators are supported only for vectors. + + typedef std::conditional_t iterator; + + typedef std::conditional_t const_iterator; +#endif + + inline iterator begin(); + inline const_iterator begin() const; + inline const_iterator cbegin() const; + inline iterator end(); + inline const_iterator end() const; + inline const_iterator cend() const; + + using RealViewReturnType = std::conditional_t::IsComplex, RealView, Derived&>; + using ConstRealViewReturnType = + std::conditional_t::IsComplex, RealView, const Derived&>; + + EIGEN_DEVICE_FUNC RealViewReturnType realView(); + EIGEN_DEVICE_FUNC ConstRealViewReturnType realView() const; + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::DenseBase +#define EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#define EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF(COND) +#define EIGEN_DOC_UNARY_ADDONS(X, Y) +#include "../plugins/CommonCwiseUnaryOps.inc" +#include "../plugins/BlockMethods.inc" +#include "../plugins/IndexedViewMethods.inc" +#include "../plugins/ReshapedMethods.inc" +#ifdef EIGEN_DENSEBASE_PLUGIN +#include EIGEN_DENSEBASE_PLUGIN +#endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_BLOCK_ADDONS_NOT_INNER_PANEL +#undef EIGEN_DOC_BLOCK_ADDONS_INNER_PANEL_IF +#undef EIGEN_DOC_UNARY_ADDONS + + // disable the use of evalTo for dense objects with a nice compilation error + template + EIGEN_DEVICE_FUNC inline void evalTo(Dest&) const { + EIGEN_STATIC_ASSERT((internal::is_same::value), + THE_EVAL_EVALTO_FUNCTION_SHOULD_NEVER_BE_CALLED_FOR_DENSE_OBJECTS); + } + + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(DenseBase) + /** Default constructor. Do nothing. */ +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_DEVICE_FUNC constexpr DenseBase() { + /* Just checks for self-consistency of the flags. + * Only do it when debugging Eigen, as this borders on paranoia and could slow compilation down + */ + EIGEN_STATIC_ASSERT( + (internal::check_implication(MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1, int(IsRowMajor)) && + internal::check_implication(MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1, int(!IsRowMajor))), + INVALID_STORAGE_ORDER_FOR_THIS_VECTOR_EXPRESSION) + } +#else + EIGEN_DEVICE_FUNC constexpr DenseBase() = default; +#endif + + private: + EIGEN_DEVICE_FUNC explicit DenseBase(int); + EIGEN_DEVICE_FUNC DenseBase(int, int); + template + EIGEN_DEVICE_FUNC explicit DenseBase(const DenseBase&); +}; + +/** Free-function swap. + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + // Use forwarding references to capture all combinations of cv-qualified l+r-value cases. + std::enable_if_t>, std::decay_t>::value && + std::is_base_of>, std::decay_t>::value, + void> + swap(DerivedA&& a, DerivedB&& b) { + a.swap(b); +} + +} // end namespace Eigen + +#endif // EIGEN_DENSEBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/DenseCoeffsBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/DenseCoeffsBase.h new file mode 100644 index 0000000000000000000000000000000000000000..68d769c8ac9b0b82046b03cec43c8e0eb788ad88 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/DenseCoeffsBase.h @@ -0,0 +1,587 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSECOEFFSBASE_H +#define EIGEN_DENSECOEFFSBASE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct add_const_on_value_type_if_arithmetic { + typedef std::conditional_t::value, T, add_const_on_value_type_t> type; +}; +} // namespace internal + +/** \brief Base class providing read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * + * \note #ReadOnlyAccessors Constant indicating read-only access + * + * This class defines the \c operator() \c const function and friends, which can be used to read specific + * entries of a matrix or array. + * + * \sa DenseCoeffsBase, DenseCoeffsBase, + * \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public EigenBase { + public: + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + + // Explanation for this CoeffReturnType typedef. + // - This is the return type of the coeff() method. + // - The LvalueBit means exactly that we can offer a coeffRef() method, which means exactly that we can get references + // to coeffs, which means exactly that we can have coeff() return a const reference (as opposed to returning a value). + // - The DirectAccessBit means exactly that the underlying data of coefficients can be directly accessed as a plain + // strided array, which means exactly that the underlying data of coefficients does exist in memory, which means + // exactly that the coefficients is const-referencable, which means exactly that we can have coeff() return a const + // reference. For example, Map have DirectAccessBit but not LvalueBit, so that Map.coeff() + // does points to a const Scalar& which exists in memory, while does not allow coeffRef() as it would not provide a + // lvalue. Notice that DirectAccessBit and LvalueBit are mutually orthogonal. + // - The is_arithmetic check is required since "const int", "const double", etc. will cause warnings on some systems + // while the declaration of "const T", where T is a non arithmetic type does not. Always returning "const Scalar&" is + // not possible, since the underlying expressions might not offer a valid address the reference could be referring to. + typedef std::conditional_t::Flags&(LvalueBit | DirectAccessBit)), const Scalar&, + std::conditional_t::value, Scalar, const Scalar>> + CoeffReturnType; + + typedef typename internal::add_const_on_value_type_if_arithmetic::type>::type + PacketReturnType; + + typedef EigenBase Base; + using Base::cols; + using Base::derived; + using Base::rows; + using Base::size; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index rowIndexByOuterInner(Index outer, Index inner) const { + return int(Derived::RowsAtCompileTime) == 1 ? 0 + : int(Derived::ColsAtCompileTime) == 1 ? inner + : int(Derived::Flags) & RowMajorBit ? outer + : inner; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index colIndexByOuterInner(Index outer, Index inner) const { + return int(Derived::ColsAtCompileTime) == 1 ? 0 + : int(Derived::RowsAtCompileTime) == 1 ? inner + : int(Derived::Flags) & RowMajorBit ? inner + : outer; + } + + /** Short version: don't use this function, use + * \link operator()(Index,Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) const \endlink. + * + * \sa operator()(Index,Index) const, coeffRef(Index,Index), coeff(Index) const + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index row, Index col) const { + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { + return coeff(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); + } + + /** \returns the coefficient at given the given row and column. + * + * \sa operator()(Index,Index), operator[](Index) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index row, Index col) const { + eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return coeff(row, col); + } + +#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT + /** \returns the coefficient at given the given row and column. + * + * \sa operator[](Index,Index), operator[](Index) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator[](Index row, Index col) const { + return operator()(row, col); + } +#endif + + /** Short version: don't use this function, use + * \link operator[](Index) const \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) const \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameter \a index is in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) const \endlink. + * + * \sa operator[](Index) const, coeffRef(Index), coeff(Index,Index) const + */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType coeff(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).coeff(index); + } + + /** \returns the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator[](Index index) const { + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + eigen_assert(index >= 0 && index < size()); + return coeff(index); + } + + /** \returns the coefficient at given index. + * + * This is synonymous to operator[](Index) const. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index), operator()(Index,Index) const, x() const, y() const, + * z() const, w() const + */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType operator()(Index index) const { + eigen_assert(index >= 0 && index < size()); + return coeff(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType x() const { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType y() const { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } + + /** equivalent to operator[](2). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType z() const { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } + + /** equivalent to operator[](3). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr CoeffReturnType w() const { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } + + /** \internal + * \returns the packet of coefficients starting at the given row and column. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index row, Index col) const { + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).template packet(row, col); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketReturnType packetByOuterInner(Index outer, Index inner) const { + return packet(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); + } + + /** \internal + * \returns the packet of coefficients starting at the given index. It is your responsibility + * to ensure that a packet really starts there. This method is only available on expressions having the + * PacketAccessBit and the LinearAccessBit. + * + * The \a LoadMode parameter may have the value \a #Aligned or \a #Unaligned. Its effect is to select + * the appropriate vectorization instruction. Aligned access is faster, but is only possible for packets + * starting at an address which is a multiple of the packet size. + */ + + template + EIGEN_STRONG_INLINE PacketReturnType packet(Index index) const { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + typedef typename internal::packet_traits::type DefaultPacketType; + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).template packet(index); + } + + protected: + // explanation: DenseBase is doing "using ..." on the methods from DenseCoeffsBase. + // But some methods are only available in the DirectAccess case. + // So we add dummy methods here with these names, so that "using... " doesn't fail. + // It's not private so that the child class DenseBase can access them, and it's not public + // either since it's an implementation detail, so has to be protected. + void coeffRef(); + void coeffRefByOuterInner(); + void writePacket(); + void writePacketByOuterInner(); + void copyCoeff(); + void copyCoeffByOuterInner(); + void copyPacket(); + void copyPacketByOuterInner(); + void stride(); + void innerStride(); + void outerStride(); + void rowStride(); + void colStride(); +}; + +/** \brief Base class providing read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * + * \note #WriteAccessors Constant indicating read/write access + * + * This class defines the non-const \c operator() function and friends, which can be used to write specific + * entries of a matrix or array. This class inherits DenseCoeffsBase which + * defines the const variant for reading specific entries. + * + * \sa DenseCoeffsBase, \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase { + public: + typedef DenseCoeffsBase Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + using Base::coeff; + using Base::colIndexByOuterInner; + using Base::cols; + using Base::derived; + using Base::rowIndexByOuterInner; + using Base::rows; + using Base::size; + using Base::operator[]; + using Base::operator(); + using Base::w; + using Base::x; + using Base::y; + using Base::z; + + /** Short version: don't use this function, use + * \link operator()(Index,Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator()(Index,Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator()(Index,Index) \endlink. + * + * \sa operator()(Index,Index), coeff(Index, Index) const, coeffRef(Index) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index row, Index col) { + eigen_internal_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return internal::evaluator(derived()).coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRefByOuterInner(Index outer, Index inner) { + return coeffRef(rowIndexByOuterInner(outer, inner), colIndexByOuterInner(outer, inner)); + } + + /** \returns a reference to the coefficient at given the given row and column. + * + * \sa operator[](Index) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator()(Index row, Index col) { + eigen_assert(row >= 0 && row < rows() && col >= 0 && col < cols()); + return coeffRef(row, col); + } + +#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT + /** \returns a reference to the coefficient at given the given row and column. + * + * \sa operator[](Index) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator[](Index row, Index col) { + return operator()(row, col); + } +#endif + + /** Short version: don't use this function, use + * \link operator[](Index) \endlink instead. + * + * Long version: this function is similar to + * \link operator[](Index) \endlink, but without the assertion. + * Use this for limiting the performance cost of debugging code when doing + * repeated coefficient access. Only use this when it is guaranteed that the + * parameters \a row and \a col are in range. + * + * If EIGEN_INTERNAL_DEBUGGING is defined, an assertion will be made, making this + * function equivalent to \link operator[](Index) \endlink. + * + * \sa operator[](Index), coeff(Index) const, coeffRef(Index,Index) + */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { + EIGEN_STATIC_ASSERT(internal::evaluator::Flags & LinearAccessBit, + THIS_COEFFICIENT_ACCESSOR_TAKING_ONE_ACCESS_IS_ONLY_FOR_EXPRESSIONS_ALLOWING_LINEAR_ACCESS) + eigen_internal_assert(index >= 0 && index < size()); + return internal::evaluator(derived()).coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator[](Index index) { + EIGEN_STATIC_ASSERT(Derived::IsVectorAtCompileTime, + THE_BRACKET_OPERATOR_IS_ONLY_FOR_VECTORS__USE_THE_PARENTHESIS_OPERATOR_INSTEAD) + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + /** \returns a reference to the coefficient at given index. + * + * This is synonymous to operator[](Index). + * + * This method is allowed only for vector expressions, and for matrix expressions having the LinearAccessBit. + * + * \sa operator[](Index) const, operator()(Index,Index), x(), y(), z(), w() + */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& operator()(Index index) { + eigen_assert(index >= 0 && index < size()); + return coeffRef(index); + } + + /** equivalent to operator[](0). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& x() { return (*this)[0]; } + + /** equivalent to operator[](1). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& y() { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 2, OUT_OF_RANGE_ACCESS); + return (*this)[1]; + } + + /** equivalent to operator[](2). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& z() { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 3, OUT_OF_RANGE_ACCESS); + return (*this)[2]; + } + + /** equivalent to operator[](3). */ + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& w() { + EIGEN_STATIC_ASSERT(Derived::SizeAtCompileTime == -1 || Derived::SizeAtCompileTime >= 4, OUT_OF_RANGE_ACCESS); + return (*this)[3]; + } +}; + +/** \brief Base class providing direct read-only coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * + * \note #DirectAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read-only using + * \c operator() . + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase { + public: + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::cols; + using Base::derived; + using Base::rows; + using Base::size; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return derived().innerStride(); } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { return derived().outerStride(); } + + // FIXME shall we remove it ? + constexpr Index stride() const { return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + EIGEN_DEVICE_FUNC constexpr Index rowStride() const { return Derived::IsRowMajor ? outerStride() : innerStride(); } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + EIGEN_DEVICE_FUNC constexpr Index colStride() const { return Derived::IsRowMajor ? innerStride() : outerStride(); } +}; + +/** \brief Base class providing direct read/write coefficient access to matrices and arrays. + * \ingroup Core_Module + * \tparam Derived Type of the derived class + * + * \note #DirectWriteAccessors Constant indicating direct access + * + * This class defines functions to work with strides which can be used to access entries directly. This class + * inherits DenseCoeffsBase which defines functions to access entries read/write using + * \c operator(). + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class DenseCoeffsBase : public DenseCoeffsBase { + public: + typedef DenseCoeffsBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef typename NumTraits::Real RealScalar; + + using Base::cols; + using Base::derived; + using Base::rows; + using Base::size; + + /** \returns the pointer increment between two consecutive elements within a slice in the inner direction. + * + * \sa outerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return derived().innerStride(); } + + /** \returns the pointer increment between two consecutive inner slices (for example, between two consecutive columns + * in a column-major matrix). + * + * \sa innerStride(), rowStride(), colStride() + */ + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return derived().outerStride(); } + + // FIXME shall we remove it ? + constexpr Index stride() const noexcept { return Derived::IsVectorAtCompileTime ? innerStride() : outerStride(); } + + /** \returns the pointer increment between two consecutive rows. + * + * \sa innerStride(), outerStride(), colStride() + */ + EIGEN_DEVICE_FUNC constexpr Index rowStride() const noexcept { + return Derived::IsRowMajor ? outerStride() : innerStride(); + } + + /** \returns the pointer increment between two consecutive columns. + * + * \sa innerStride(), outerStride(), rowStride() + */ + EIGEN_DEVICE_FUNC constexpr Index colStride() const noexcept { + return Derived::IsRowMajor ? innerStride() : outerStride(); + } +}; + +namespace internal { + +template +struct first_aligned_impl { + static constexpr Index run(const Derived&) noexcept { return 0; } +}; + +template +struct first_aligned_impl { + static inline Index run(const Derived& m) { return internal::first_aligned(m.data(), m.size()); } +}; + +/** \internal \returns the index of the first element of the array stored by \a m that is properly aligned with respect + * to \a Alignment for vectorization. + * + * \tparam Alignment requested alignment in Bytes. + * + * There is also the variant first_aligned(const Scalar*, Integer) defined in Memory.h. See it for more + * documentation. + */ +template +static inline Index first_aligned(const DenseBase& m) { + enum { ReturnZero = (int(evaluator::Alignment) >= Alignment) || !(Derived::Flags & DirectAccessBit) }; + return first_aligned_impl::run(m.derived()); +} + +template +static inline Index first_default_aligned(const DenseBase& m) { + typedef typename Derived::Scalar Scalar; + typedef typename packet_traits::type DefaultPacketType; + return internal::first_aligned::alignment), Derived>(m); +} + +template ::ret> +struct inner_stride_at_compile_time { + enum { ret = traits::InnerStrideAtCompileTime }; +}; + +template +struct inner_stride_at_compile_time { + enum { ret = 0 }; +}; + +template ::ret> +struct outer_stride_at_compile_time { + enum { ret = traits::OuterStrideAtCompileTime }; +}; + +template +struct outer_stride_at_compile_time { + enum { ret = 0 }; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSECOEFFSBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/DenseStorage.h b/o-voxel/third_party/eigen/Eigen/src/Core/DenseStorage.h new file mode 100644 index 0000000000000000000000000000000000000000..ae7e5b4c75aefdd9f78d7db99f2914e62eef7578 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/DenseStorage.h @@ -0,0 +1,573 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2010-2013 Hauke Heibel +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXSTORAGE_H +#define EIGEN_MATRIXSTORAGE_H + +#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN +#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) \ + X; \ + EIGEN_DENSE_STORAGE_CTOR_PLUGIN; +#else +#define EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(X) +#endif + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +#if defined(EIGEN_DISABLE_UNALIGNED_ARRAY_ASSERT) +#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) +#else +#define EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) \ + eigen_assert((is_constant_evaluated() || (std::uintptr_t(array) % Alignment == 0)) && \ + "this assertion is explained here: " \ + "http://eigen.tuxfamily.org/dox-devel/group__TopicUnalignedArrayAssert.html" \ + " **** READ THIS WEB PAGE !!! ****"); +#endif + +#if EIGEN_STACK_ALLOCATION_LIMIT +#define EIGEN_MAKE_STACK_ALLOCATION_ASSERT(X) \ + EIGEN_STATIC_ASSERT(X <= EIGEN_STACK_ALLOCATION_LIMIT, OBJECT_ALLOCATED_ON_STACK_IS_TOO_BIG) +#else +#define EIGEN_MAKE_STACK_ALLOCATION_ASSERT(X) +#endif + +/** \internal + * Static array. If the MatrixOrArrayOptions require auto-alignment, the array will be automatically aligned: + * to 16 bytes boundary if the total size is a multiple of 16 bytes. + */ + +template ::value> +struct plain_array { + EIGEN_ALIGN_TO_BOUNDARY(Alignment) T array[Size]; +#if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { + EIGEN_MAKE_UNALIGNED_ARRAY_ASSERT(Alignment) + EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) + } +#endif +}; + +template +struct plain_array { + // on some 32-bit platforms, stack-allocated arrays are aligned to 4 bytes, not the preferred alignment of T + EIGEN_ALIGN_TO_BOUNDARY(alignof(T)) T array[Size]; +#if defined(EIGEN_NO_DEBUG) || defined(EIGEN_TESTING_PLAINOBJECT_CTOR) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() = default; +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr plain_array() { EIGEN_MAKE_STACK_ALLOCATION_ASSERT(Size * sizeof(T)) } +#endif +}; + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap_plain_array(plain_array& a, + plain_array& b, + Index a_size, Index b_size) { + Index common_size = numext::mini(a_size, b_size); + std::swap_ranges(a.array, a.array + common_size, b.array); + if (a_size > b_size) + smart_copy(a.array + common_size, a.array + a_size, b.array + common_size); + else if (b_size > a_size) + smart_copy(b.array + common_size, b.array + b_size, a.array + common_size); +} + +template +class DenseStorage_impl { + plain_array m_data; + + public: +#ifndef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = Size) + smart_copy(other.m_data.array, other.m_data.array + Size, m_data.array); + } +#endif + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + numext::swap(m_data, other.m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, + Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } +}; +template +class DenseStorage_impl { + plain_array m_data; + Index m_rows = 0; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) + : m_rows(rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + m_rows = other.m_rows; + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_rows, other.m_rows); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } +}; +template +class DenseStorage_impl { + plain_array m_data; + Index m_cols = 0; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) + : m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + m_cols = other.m_cols; + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_cols, other.m_cols); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } +}; +template +class DenseStorage_impl { + plain_array m_data; + Index m_rows = 0; + Index m_cols = 0; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_rows(other.m_rows), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) + : m_rows(rows), m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + EIGEN_UNUSED_VARIABLE(size) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + smart_copy(other.m_data.array, other.m_data.array + other.size(), m_data.array); + m_rows = other.m_rows; + m_cols = other.m_cols; + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) { + swap_plain_array(m_data, other.m_data, size(), other.size()); + numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data.array; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data.array; } +}; +// null matrix variants +template +class DenseStorage_impl { + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl&) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, + Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index /*cols*/) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } +}; +template +class DenseStorage_impl { + Index m_rows = 0; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index /*cols*/) + : m_rows(rows) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_rows, other.m_rows); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index /*cols*/) { + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index /*cols*/) { + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } +}; +template +class DenseStorage_impl { + Index m_cols = 0; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index /*rows*/, Index cols) + : m_cols(cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_cols, other.m_cols); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index /*rows*/, Index cols) { + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } +}; +template +class DenseStorage_impl { + Index m_rows = 0; + Index m_cols = 0; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index /*size*/, Index rows, Index cols) + : m_rows(rows), m_cols(cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index /*size*/, Index rows, Index cols) { + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return nullptr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return nullptr; } +}; +// fixed-size matrix with dynamic memory allocation not currently supported +template +class DenseStorage_impl {}; +// dynamic-sized variants +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_rows = 0; + + public: + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index /*cols*/) + : m_data(conditional_aligned_new_auto(size)), m_rows(rows) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_rows(other.m_rows) { + other.m_data = nullptr; + other.m_rows = 0; + } + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_data, other.m_data); + numext::swap(m_rows, other.m_rows); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index rows, Index /*cols*/) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index rows, Index /*cols*/) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_rows = rows; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * Cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } +}; +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_cols = 0; + + public: + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index /*rows*/, Index cols) + : m_data(conditional_aligned_new_auto(size)), m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_cols(other.m_cols) { + other.m_data = nullptr; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_data, other.m_data); + numext::swap(m_cols, other.m_cols); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index /*rows*/, Index cols) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index /*rows*/, Index cols) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return Rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return Rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } +}; +template +class DenseStorage_impl { + static constexpr bool Align = (Options & DontAlign) == 0; + T* m_data = nullptr; + Index m_rows = 0; + Index m_cols = 0; + + public: + static constexpr int Size = Dynamic; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(const DenseStorage_impl& other) + : m_data(conditional_aligned_new_auto(other.size())), m_rows(other.m_rows), m_cols(other.m_cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN(Index size = other.size()) + smart_copy(other.m_data, other.m_data + other.size(), m_data); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(Index size, Index rows, Index cols) + : m_data(conditional_aligned_new_auto(size)), m_rows(rows), m_cols(cols) { + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl(DenseStorage_impl&& other) noexcept + : m_data(other.m_data), m_rows(other.m_rows), m_cols(other.m_cols) { + other.m_data = nullptr; + other.m_rows = 0; + other.m_cols = 0; + } + EIGEN_DEVICE_FUNC ~DenseStorage_impl() { conditional_aligned_delete_auto(m_data, size()); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(const DenseStorage_impl& other) { + resize(other.size(), other.rows(), other.cols()); + smart_copy(other.m_data, other.m_data + other.size(), m_data); + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage_impl& operator=(DenseStorage_impl&& other) noexcept { + this->swap(other); + return *this; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void swap(DenseStorage_impl& other) noexcept { + numext::swap(m_data, other.m_data); + numext::swap(m_rows, other.m_rows); + numext::swap(m_cols, other.m_cols); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void conservativeResize(Index size, Index rows, Index cols) { + m_data = conditional_aligned_realloc_new_auto(m_data, size, this->size()); + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index size, Index rows, Index cols) { + Index oldSize = this->size(); + if (oldSize != size) { + conditional_aligned_delete_auto(m_data, oldSize); + m_data = conditional_aligned_new_auto(size); + EIGEN_INTERNAL_DENSE_STORAGE_CTOR_PLUGIN({}) + } + m_rows = rows; + m_cols = cols; + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const { return m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index size() const { return m_rows * m_cols; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr T* data() { return m_data; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const T* data() const { return m_data; } +}; +template +struct use_default_move { + static constexpr bool DynamicObject = Size == Dynamic; + static constexpr bool TrivialObject = + (!NumTraits::RequireInitialization) && (Rows >= 0) && (Cols >= 0) && (Size == Rows * Cols); + static constexpr bool value = DynamicObject || TrivialObject; +}; +} // end namespace internal + +/** \internal + * + * \class DenseStorage_impl + * \ingroup Core_Module + * + * \brief Stores the data of a matrix + * + * This class stores the data of fixed-size, dynamic-size or mixed matrices + * in a way as compact as possible. + * + * \sa Matrix + */ +template ::value> +class DenseStorage : public internal::DenseStorage_impl { + using Base = internal::DenseStorage_impl; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) + : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + // if DenseStorage meets the requirements of use_default_move, then use the move construction and move assignment + // operation defined in DenseStorage_impl, or the compiler-generated version if none is defined + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&&) = default; +}; +template +class DenseStorage + : public internal::DenseStorage_impl { + using Base = internal::DenseStorage_impl; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage() = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(const DenseStorage&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(Index size, Index rows, Index cols) + : Base(size, rows, cols) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(const DenseStorage&) = default; + // if DenseStorage does not meet the requirements of use_default_move, then defer to the copy construction and copy + // assignment behavior + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage(DenseStorage&& other) + : DenseStorage(static_cast(other)) {} + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr DenseStorage& operator=(DenseStorage&& other) { + *this = other; + return *this; + } +}; + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/DeviceWrapper.h b/o-voxel/third_party/eigen/Eigen/src/Core/DeviceWrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..3accbde6eb3b6a4e7477cba084eb0c33e98aaf7e --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/DeviceWrapper.h @@ -0,0 +1,153 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2023 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DEVICEWRAPPER_H +#define EIGEN_DEVICEWRAPPER_H + +namespace Eigen { +template +struct DeviceWrapper { + using Base = EigenBase>; + using Scalar = typename Derived::Scalar; + + EIGEN_DEVICE_FUNC DeviceWrapper(Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {} + EIGEN_DEVICE_FUNC DeviceWrapper(const Base& xpr, Device& device) : m_xpr(xpr.derived()), m_device(device) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const EigenBase& other) { + using AssignOp = internal::assign_op; + internal::call_assignment(*this, other.derived(), AssignOp()); + return m_xpr; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const EigenBase& other) { + using AddAssignOp = internal::add_assign_op; + internal::call_assignment(*this, other.derived(), AddAssignOp()); + return m_xpr; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const EigenBase& other) { + using SubAssignOp = internal::sub_assign_op; + internal::call_assignment(*this, other.derived(), SubAssignOp()); + return m_xpr; + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& derived() { return m_xpr; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Device& device() { return m_device; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE NoAlias noalias() { + return NoAlias(*this); + } + + Derived& m_xpr; + Device& m_device; +}; + +namespace internal { + +// this is where we differentiate between lazy assignment and specialized kernels (e.g. matrix products) +template ::Shape, + typename evaluator_traits::Shape>::Kind, + typename EnableIf = void> +struct AssignmentWithDevice; + +// unless otherwise specified, use the default product implementation +template +struct AssignmentWithDevice, Functor, Device, Dense2Dense, Weak> { + using SrcXprType = Product; + using Base = Assignment; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func, + Device&) { + Base::run(dst, src, func); + } +}; + +// specialization for coeffcient-wise assignment +template +struct AssignmentWithDevice { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, const Functor& func, + Device& device) { +#ifndef EIGEN_NO_DEBUG + internal::check_for_aliasing(dst, src); +#endif + + call_dense_assignment_loop(dst, src, func, device); + } +}; + +// this allows us to use the default evaluation scheme if it is not specialized for the device +template +struct dense_assignment_loop_with_device { + using Base = dense_assignment_loop; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Kernel& kernel, Device&) { Base::run(kernel); } +}; + +// entry point for a generic expression with device +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_assignment_no_alias(DeviceWrapper dst, + const Src& src, const Func& func) { + enum { + NeedToTranspose = ((int(Dst::RowsAtCompileTime) == 1 && int(Src::ColsAtCompileTime) == 1) || + (int(Dst::ColsAtCompileTime) == 1 && int(Src::RowsAtCompileTime) == 1)) && + int(Dst::SizeAtCompileTime) != 1 + }; + + using ActualDstTypeCleaned = std::conditional_t, Dst>; + using ActualDstType = std::conditional_t, Dst&>; + ActualDstType actualDst(dst.derived()); + + // TODO check whether this is the right place to perform these checks: + EIGEN_STATIC_ASSERT_LVALUE(Dst) + EIGEN_STATIC_ASSERT_SAME_MATRIX_SIZE(ActualDstTypeCleaned, Src) + EIGEN_CHECK_BINARY_COMPATIBILIY(Func, typename ActualDstTypeCleaned::Scalar, typename Src::Scalar); + + // this provides a mechanism for specializing simple assignments, matrix products, etc + AssignmentWithDevice::run(actualDst, src, func, dst.device()); +} + +// copy and pasted from AssignEvaluator except forward device to kernel +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void call_dense_assignment_loop(DstXprType& dst, const SrcXprType& src, + const Functor& func, Device& device) { + using DstEvaluatorType = evaluator; + using SrcEvaluatorType = evaluator; + + SrcEvaluatorType srcEvaluator(src); + + // NOTE To properly handle A = (A*A.transpose())/s with A rectangular, + // we need to resize the destination after the source evaluator has been created. + resize_if_allowed(dst, src, func); + + DstEvaluatorType dstEvaluator(dst); + + using Kernel = generic_dense_assignment_kernel; + + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + dense_assignment_loop_with_device::run(kernel, device); +} + +} // namespace internal + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper EigenBase::device(Device& device) { + return DeviceWrapper(derived(), device); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper EigenBase::device( + Device& device) const { + return DeviceWrapper(derived(), device); +} +} // namespace Eigen +#endif diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Diagonal.h b/o-voxel/third_party/eigen/Eigen/src/Core/Diagonal.h new file mode 100644 index 0000000000000000000000000000000000000000..c2ca88613b2555d60bd92ca1adf399b5f9a6d645 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Diagonal.h @@ -0,0 +1,219 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2009 Benoit Jacob +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONAL_H +#define EIGEN_DIAGONAL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class Diagonal + * \ingroup Core_Module + * + * \brief Expression of a diagonal/subdiagonal/superdiagonal in a matrix + * + * \tparam MatrixType the type of the object in which we are taking a sub/main/super diagonal + * \tparam DiagIndex the index of the sub/super diagonal. The default is 0 and it means the main diagonal. + * A positive value means a superdiagonal, a negative value means a subdiagonal. + * You can also use DynamicIndex so the index can be set at runtime. + * + * The matrix is not required to be square. + * + * This class represents an expression of the main diagonal, or any sub/super diagonal + * of a square matrix. It is the return type of MatrixBase::diagonal() and MatrixBase::diagonal(Index) and most of the + * time this is the only way it is used. + * + * \sa MatrixBase::diagonal(), MatrixBase::diagonal(Index) + */ + +namespace internal { +template +struct traits > : traits { + typedef typename ref_selector::type MatrixTypeNested; + typedef std::remove_reference_t MatrixTypeNested_; + typedef typename MatrixType::StorageKind StorageKind; + enum { + RowsAtCompileTime = (int(DiagIndex) == DynamicIndex || int(MatrixType::SizeAtCompileTime) == Dynamic) + ? Dynamic + : (plain_enum_min(MatrixType::RowsAtCompileTime - plain_enum_max(-DiagIndex, 0), + MatrixType::ColsAtCompileTime - plain_enum_max(DiagIndex, 0))), + ColsAtCompileTime = 1, + MaxRowsAtCompileTime = + int(MatrixType::MaxSizeAtCompileTime) == Dynamic ? Dynamic + : DiagIndex == DynamicIndex + ? min_size_prefer_fixed(MatrixType::MaxRowsAtCompileTime, MatrixType::MaxColsAtCompileTime) + : (plain_enum_min(MatrixType::MaxRowsAtCompileTime - plain_enum_max(-DiagIndex, 0), + MatrixType::MaxColsAtCompileTime - plain_enum_max(DiagIndex, 0))), + MaxColsAtCompileTime = 1, + MaskLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = (unsigned int)MatrixTypeNested_::Flags & (RowMajorBit | MaskLvalueBit | DirectAccessBit) & + ~RowMajorBit, // FIXME DirectAccessBit should not be handled by expressions + MatrixTypeOuterStride = outer_stride_at_compile_time::ret, + InnerStrideAtCompileTime = MatrixTypeOuterStride == Dynamic ? Dynamic : MatrixTypeOuterStride + 1, + OuterStrideAtCompileTime = 0 + }; +}; +} // namespace internal + +template +class Diagonal : public internal::dense_xpr_base >::type { + public: + enum { DiagIndex = DiagIndex_ }; + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Diagonal) + + EIGEN_DEVICE_FUNC explicit inline Diagonal(MatrixType& matrix, Index a_index = DiagIndex) + : m_matrix(matrix), m_index(a_index) { + eigen_assert(a_index <= m_matrix.cols() && -a_index <= m_matrix.rows()); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Diagonal) + + EIGEN_DEVICE_FUNC inline Index rows() const { + return m_index.value() < 0 ? numext::mini(m_matrix.cols(), m_matrix.rows() + m_index.value()) + : numext::mini(m_matrix.rows(), m_matrix.cols() - m_index.value()); + } + + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return 1; } + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_matrix.outerStride() + 1; } + + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return 0; } + + typedef std::conditional_t::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue* data() { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + EIGEN_DEVICE_FUNC inline const Scalar* data() const { return &(m_matrix.coeffRef(rowOffset(), colOffset())); } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index) { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index row, Index) const { + return m_matrix.coeffRef(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index row, Index) const { + return m_matrix.coeff(row + rowOffset(), row + colOffset()); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index idx) { + EIGEN_STATIC_ASSERT_LVALUE(MatrixType) + return m_matrix.coeffRef(idx + rowOffset(), idx + colOffset()); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index idx) const { + return m_matrix.coeffRef(idx + rowOffset(), idx + colOffset()); + } + + EIGEN_DEVICE_FUNC inline CoeffReturnType coeff(Index idx) const { + return m_matrix.coeff(idx + rowOffset(), idx + colOffset()); + } + + EIGEN_DEVICE_FUNC inline const internal::remove_all_t& nestedExpression() const { + return m_matrix; + } + + EIGEN_DEVICE_FUNC inline Index index() const { return m_index.value(); } + + protected: + typename internal::ref_selector::non_const_type m_matrix; + const internal::variable_if_dynamicindex m_index; + + private: + // some compilers may fail to optimize std::max etc in case of compile-time constants... + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index absDiagIndex() const noexcept { + return m_index.value() > 0 ? m_index.value() : -m_index.value(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rowOffset() const noexcept { + return m_index.value() > 0 ? 0 : -m_index.value(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index colOffset() const noexcept { + return m_index.value() > 0 ? m_index.value() : 0; + } + // trigger a compile-time error if someone try to call packet + template + typename MatrixType::PacketReturnType packet(Index) const; + template + typename MatrixType::PacketReturnType packet(Index, Index) const; +}; + +/** \returns an expression of the main diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * Example: \include MatrixBase_diagonal.cpp + * Output: \verbinclude MatrixBase_diagonal.out + * + * \sa class Diagonal */ +template +EIGEN_DEVICE_FUNC inline typename MatrixBase::DiagonalReturnType MatrixBase::diagonal() { + return DiagonalReturnType(derived()); +} + +/** This is the const version of diagonal(). */ +template +EIGEN_DEVICE_FUNC inline const typename MatrixBase::ConstDiagonalReturnType MatrixBase::diagonal() + const { + return ConstDiagonalReturnType(derived()); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_int.cpp + * Output: \verbinclude MatrixBase_diagonal_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +EIGEN_DEVICE_FUNC inline Diagonal MatrixBase::diagonal(Index index) { + return Diagonal(derived(), index); +} + +/** This is the const version of diagonal(Index). */ +template +EIGEN_DEVICE_FUNC inline const Diagonal MatrixBase::diagonal(Index index) const { + return Diagonal(derived(), index); +} + +/** \returns an expression of the \a DiagIndex-th sub or super diagonal of the matrix \c *this + * + * \c *this is not required to be square. + * + * The template parameter \a DiagIndex represent a super diagonal if \a DiagIndex > 0 + * and a sub diagonal otherwise. \a DiagIndex == 0 is equivalent to the main diagonal. + * + * Example: \include MatrixBase_diagonal_template_int.cpp + * Output: \verbinclude MatrixBase_diagonal_template_int.out + * + * \sa MatrixBase::diagonal(), class Diagonal */ +template +template +EIGEN_DEVICE_FUNC inline Diagonal MatrixBase::diagonal() { + return Diagonal(derived()); +} + +/** This is the const version of diagonal(). */ +template +template +EIGEN_DEVICE_FUNC inline const Diagonal MatrixBase::diagonal() const { + return Diagonal(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONAL_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/DiagonalMatrix.h b/o-voxel/third_party/eigen/Eigen/src/Core/DiagonalMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..7b1286a6b0f54a1aa791002f5c4b8f622a6a83dc --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/DiagonalMatrix.h @@ -0,0 +1,420 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALMATRIX_H +#define EIGEN_DIAGONALMATRIX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class DiagonalBase + * \ingroup Core_Module + * + * \brief Base class for diagonal matrices and expressions + * + * This is the base class that is inherited by diagonal matrix and related expression + * types, which internally use a vector for storing the diagonal entries. Diagonal + * types always represent square matrices. + * + * \tparam Derived is the derived type, a DiagonalMatrix or DiagonalWrapper. + * + * \sa class DiagonalMatrix, class DiagonalWrapper + */ +template +class DiagonalBase : public EigenBase { + public: + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::RealScalar RealScalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + IsVectorAtCompileTime = 0, + Flags = NoPreferredStorageOrderBit + }; + + typedef Matrix + DenseMatrixType; + typedef DenseMatrixType DenseType; + typedef DiagonalMatrix + PlainObject; + + /** \returns a reference to the derived object. */ + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + /** \returns a const reference to the derived object. */ + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } + + /** + * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type, + * not an expression. + * \returns A dense matrix, with its diagonal entries set from the the derived object. */ + EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } + + /** \returns a reference to the derived object's vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return derived().diagonal(); } + /** \returns a const reference to the derived object's vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return derived().diagonal(); } + + /** \returns the value of the coefficient as if \c *this was a dense matrix. */ + EIGEN_DEVICE_FUNC inline Scalar coeff(Index row, Index col) const { + eigen_assert(row >= 0 && col >= 0 && row < rows() && col <= cols()); + return row == col ? diagonal().coeff(row) : Scalar(0); + } + + /** \returns the number of rows. */ + EIGEN_DEVICE_FUNC constexpr Index rows() const { return diagonal().size(); } + /** \returns the number of columns. */ + EIGEN_DEVICE_FUNC constexpr Index cols() const { return diagonal().size(); } + + /** \returns the diagonal matrix product of \c *this by the dense matrix, \a matrix */ + template + EIGEN_DEVICE_FUNC const Product operator*( + const MatrixBase& matrix) const { + return Product(derived(), matrix.derived()); + } + + template + using DiagonalProductReturnType = DiagonalWrapper; + + /** \returns the diagonal matrix product of \c *this by the diagonal matrix \a other */ + template + EIGEN_DEVICE_FUNC const DiagonalProductReturnType operator*( + const DiagonalBase& other) const { + return diagonal().cwiseProduct(other.diagonal()).asDiagonal(); + } + + using DiagonalInverseReturnType = + DiagonalWrapper, const DiagonalVectorType>>; + + /** \returns the inverse \c *this. Computed as the coefficient-wise inverse of the diagonal. */ + EIGEN_DEVICE_FUNC inline const DiagonalInverseReturnType inverse() const { + return diagonal().cwiseInverse().asDiagonal(); + } + + using DiagonalScaleReturnType = + DiagonalWrapper; + + /** \returns the product of \c *this by the scalar \a scalar */ + EIGEN_DEVICE_FUNC inline const DiagonalScaleReturnType operator*(const Scalar& scalar) const { + return (diagonal() * scalar).asDiagonal(); + } + + using ScaleDiagonalReturnType = + DiagonalWrapper; + + /** \returns the product of a scalar and the diagonal matrix \a other */ + EIGEN_DEVICE_FUNC friend inline const ScaleDiagonalReturnType operator*(const Scalar& scalar, + const DiagonalBase& other) { + return (scalar * other.diagonal()).asDiagonal(); + } + + template + using DiagonalSumReturnType = DiagonalWrapper; + + /** \returns the sum of \c *this and the diagonal matrix \a other */ + template + EIGEN_DEVICE_FUNC inline const DiagonalSumReturnType operator+( + const DiagonalBase& other) const { + return (diagonal() + other.diagonal()).asDiagonal(); + } + + template + using DiagonalDifferenceReturnType = DiagonalWrapper; + + /** \returns the difference of \c *this and the diagonal matrix \a other */ + template + EIGEN_DEVICE_FUNC inline const DiagonalDifferenceReturnType operator-( + const DiagonalBase& other) const { + return (diagonal() - other.diagonal()).asDiagonal(); + } +}; + +/** \class DiagonalMatrix + * \ingroup Core_Module + * + * \brief Represents a diagonal matrix with its storage + * + * \tparam Scalar_ the type of coefficients + * \tparam SizeAtCompileTime the dimension of the matrix, or Dynamic + * \tparam MaxSizeAtCompileTime the dimension of the matrix, or Dynamic. This parameter is optional and defaults + * to SizeAtCompileTime. Most of the time, you do not need to specify it. + * + * \sa class DiagonalBase, class DiagonalWrapper + */ + +namespace internal { +template +struct traits> + : traits> { + typedef Matrix DiagonalVectorType; + typedef DiagonalShape StorageKind; + enum { Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit }; +}; +} // namespace internal +template +class DiagonalMatrix : public DiagonalBase> { + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename internal::traits::DiagonalVectorType DiagonalVectorType; + typedef const DiagonalMatrix& Nested; + typedef Scalar_ Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; +#endif + + protected: + DiagonalVectorType m_diagonal; + + public: + /** const version of diagonal(). */ + EIGEN_DEVICE_FUNC inline const DiagonalVectorType& diagonal() const { return m_diagonal; } + /** \returns a reference to the stored vector of diagonal coefficients. */ + EIGEN_DEVICE_FUNC inline DiagonalVectorType& diagonal() { return m_diagonal; } + + /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix() {} + + /** Constructs a diagonal matrix with given dimension */ + EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(Index dim) : m_diagonal(dim) {} + + /** 2D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y) : m_diagonal(x, y) {} + + /** 3D constructor. */ + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const Scalar& x, const Scalar& y, const Scalar& z) : m_diagonal(x, y, z) {} + + /** \brief Construct a diagonal matrix with fixed size from an arbitrary number of coefficients. + * + * \warning To construct a diagonal matrix of fixed size, the number of values passed to this + * constructor must match the fixed dimension of \c *this. + * + * \sa DiagonalMatrix(const Scalar&, const Scalar&) + * \sa DiagonalMatrix(const Scalar&, const Scalar&, const Scalar&) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DiagonalMatrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, + const ArgTypes&... args) + : m_diagonal(a0, a1, a2, args...) {} + + /** \brief Constructs a DiagonalMatrix and initializes it by elements given by an initializer list of initializer + * lists \cpp11 + */ + EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE DiagonalMatrix( + const std::initializer_list>& list) + : m_diagonal(list) {} + + /** \brief Constructs a DiagonalMatrix from an r-value diagonal vector type */ + EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(DiagonalVectorType&& diag) : m_diagonal(std::move(diag)) {} + + /** Copy constructor. */ + template + EIGEN_DEVICE_FUNC inline DiagonalMatrix(const DiagonalBase& other) : m_diagonal(other.diagonal()) {} + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */ + inline DiagonalMatrix(const DiagonalMatrix& other) : m_diagonal(other.diagonal()) {} +#endif + + /** generic constructor from expression of the diagonal coefficients */ + template + EIGEN_DEVICE_FUNC explicit inline DiagonalMatrix(const MatrixBase& other) : m_diagonal(other) {} + + /** Copy operator. */ + template + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalBase& other) { + m_diagonal = other.diagonal(); + return *this; + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC DiagonalMatrix& operator=(const DiagonalMatrix& other) { + m_diagonal = other.diagonal(); + return *this; + } +#endif + + typedef DiagonalWrapper, DiagonalVectorType>> + InitializeReturnType; + + typedef DiagonalWrapper, DiagonalVectorType>> + ZeroInitializeReturnType; + + /** Initializes a diagonal matrix of size SizeAtCompileTime with coefficients set to zero */ + EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero() { return DiagonalVectorType::Zero().asDiagonal(); } + /** Initializes a diagonal matrix of size dim with coefficients set to zero */ + EIGEN_DEVICE_FUNC static const ZeroInitializeReturnType Zero(Index size) { + return DiagonalVectorType::Zero(size).asDiagonal(); + } + /** Initializes a identity matrix of size SizeAtCompileTime */ + EIGEN_DEVICE_FUNC static const InitializeReturnType Identity() { return DiagonalVectorType::Ones().asDiagonal(); } + /** Initializes a identity matrix of size dim */ + EIGEN_DEVICE_FUNC static const InitializeReturnType Identity(Index size) { + return DiagonalVectorType::Ones(size).asDiagonal(); + } + + /** Resizes to given size. */ + EIGEN_DEVICE_FUNC inline void resize(Index size) { m_diagonal.resize(size); } + /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero() { m_diagonal.setZero(); } + /** Resizes and sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero(Index size) { m_diagonal.setZero(size); } + /** Sets this matrix to be the identity matrix of the current size. */ + EIGEN_DEVICE_FUNC inline void setIdentity() { m_diagonal.setOnes(); } + /** Sets this matrix to be the identity matrix of the given size. */ + EIGEN_DEVICE_FUNC inline void setIdentity(Index size) { m_diagonal.setOnes(size); } +}; + +/** \class DiagonalWrapper + * \ingroup Core_Module + * + * \brief Expression of a diagonal matrix + * + * \tparam DiagonalVectorType_ the type of the vector of diagonal coefficients + * + * This class is an expression of a diagonal matrix, but not storing its own vector of diagonal coefficients, + * instead wrapping an existing vector expression. It is the return type of MatrixBase::asDiagonal() + * and most of the time this is the only way that it is used. + * + * \sa class DiagonalMatrix, class DiagonalBase, MatrixBase::asDiagonal() + */ + +namespace internal { +template +struct traits> { + typedef DiagonalVectorType_ DiagonalVectorType; + typedef typename DiagonalVectorType::Scalar Scalar; + typedef typename DiagonalVectorType::StorageIndex StorageIndex; + typedef DiagonalShape StorageKind; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + ColsAtCompileTime = DiagonalVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = DiagonalVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit + }; +}; +} // namespace internal + +template +class DiagonalWrapper : public DiagonalBase>, internal::no_assignment_operator { + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef DiagonalVectorType_ DiagonalVectorType; + typedef DiagonalWrapper Nested; +#endif + + /** Constructor from expression of diagonal coefficients to wrap. */ + EIGEN_DEVICE_FUNC explicit inline DiagonalWrapper(DiagonalVectorType& a_diagonal) : m_diagonal(a_diagonal) {} + + /** \returns a const reference to the wrapped expression of diagonal coefficients. */ + EIGEN_DEVICE_FUNC const DiagonalVectorType& diagonal() const { return m_diagonal; } + + protected: + typename DiagonalVectorType::Nested m_diagonal; +}; + +/** \returns a pseudo-expression of a diagonal matrix with *this as vector of diagonal coefficients + * + * \only_for_vectors + * + * Example: \include MatrixBase_asDiagonal.cpp + * Output: \verbinclude MatrixBase_asDiagonal.out + * + * \sa class DiagonalWrapper, class DiagonalMatrix, diagonal(), isDiagonal() + **/ +template +EIGEN_DEVICE_FUNC inline const DiagonalWrapper MatrixBase::asDiagonal() const { + return DiagonalWrapper(derived()); +} + +/** \returns true if *this is approximately equal to a diagonal matrix, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isDiagonal.cpp + * Output: \verbinclude MatrixBase_isDiagonal.out + * + * \sa asDiagonal() + */ +template +bool MatrixBase::isDiagonal(const RealScalar& prec) const { + if (cols() != rows()) return false; + RealScalar maxAbsOnDiagonal = static_cast(-1); + for (Index j = 0; j < cols(); ++j) { + RealScalar absOnDiagonal = numext::abs(coeff(j, j)); + if (absOnDiagonal > maxAbsOnDiagonal) maxAbsOnDiagonal = absOnDiagonal; + } + for (Index j = 0; j < cols(); ++j) + for (Index i = 0; i < j; ++i) { + if (!internal::isMuchSmallerThan(coeff(i, j), maxAbsOnDiagonal, prec)) return false; + if (!internal::isMuchSmallerThan(coeff(j, i), maxAbsOnDiagonal, prec)) return false; + } + return true; +} + +namespace internal { + +template <> +struct storage_kind_to_shape { + typedef DiagonalShape Shape; +}; + +struct Diagonal2Dense {}; + +template <> +struct AssignmentKind { + typedef Diagonal2Dense Kind; +}; + +// Diagonal matrix to Dense assignment +template +struct Assignment { + static EIGEN_DEVICE_FUNC void run( + DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + dst.setZero(); + dst.diagonal() = src.diagonal(); + } + + static EIGEN_DEVICE_FUNC void run( + DstXprType& dst, const SrcXprType& src, + const internal::add_assign_op& /*func*/) { + dst.diagonal() += src.diagonal(); + } + + static EIGEN_DEVICE_FUNC void run( + DstXprType& dst, const SrcXprType& src, + const internal::sub_assign_op& /*func*/) { + dst.diagonal() -= src.diagonal(); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALMATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/DiagonalProduct.h b/o-voxel/third_party/eigen/Eigen/src/Core/DiagonalProduct.h new file mode 100644 index 0000000000000000000000000000000000000000..aece50f0f68c9e20adc17fa708a46463de3c645b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/DiagonalProduct.h @@ -0,0 +1,30 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DIAGONALPRODUCT_H +#define EIGEN_DIAGONALPRODUCT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \returns the diagonal matrix product of \c *this by the diagonal matrix \a diagonal. + */ +template +template +EIGEN_DEVICE_FUNC inline const Product MatrixBase::operator*( + const DiagonalBase &a_diagonal) const { + return Product(derived(), a_diagonal.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_DIAGONALPRODUCT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Dot.h b/o-voxel/third_party/eigen/Eigen/src/Core/Dot.h new file mode 100644 index 0000000000000000000000000000000000000000..22aab128e30079d01ed99ff04393742268b60158 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Dot.h @@ -0,0 +1,265 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008, 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DOT_H +#define EIGEN_DOT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template ::Scalar> +struct squared_norm_impl { + using Real = typename NumTraits::Real; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Real run(const Derived& a) { return a.realView().cwiseAbs2().sum(); } +}; + +template +struct squared_norm_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE bool run(const Derived& a) { return a.any(); } +}; + +} // end namespace internal + +/** \fn MatrixBase::dot + * \returns the dot product of *this with other. + * + * \only_for_vectors + * + * \note If the scalar type is complex numbers, then this function returns the hermitian + * (sesquilinear) dot product, conjugate-linear in the first variable and linear in the + * second variable. + * + * \sa squaredNorm(), norm() + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE + typename ScalarBinaryOpTraits::Scalar, + typename internal::traits::Scalar>::ReturnType + MatrixBase::dot(const MatrixBase& other) const { + return internal::dot_impl::run(derived(), other.derived()); +} + +//---------- implementation of L2 norm and related functions ---------- + +/** \returns, for vectors, the squared \em l2 norm of \c *this, and for matrices the squared Frobenius norm. + * In both cases, it consists in the sum of the square of all the matrix entries. + * For vectors, this is also equals to the dot product of \c *this with itself. + * + * \sa dot(), norm(), lpNorm() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real +MatrixBase::squaredNorm() const { + return internal::squared_norm_impl::run(derived()); +} + +/** \returns, for vectors, the \em l2 norm of \c *this, and for matrices the Frobenius norm. + * In both cases, it consists in the square root of the sum of the square of all the matrix entries. + * For vectors, this is also equals to the square root of the dot product of \c *this with itself. + * + * \sa lpNorm(), dot(), squaredNorm() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename NumTraits::Scalar>::Real +MatrixBase::norm() const { + return numext::sqrt(squaredNorm()); +} + +/** \returns an expression of the quotient of \c *this by its own norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \only_for_vectors + * + * \sa norm(), normalize() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject MatrixBase::normalized() + const { + typedef typename internal::nested_eval::type Nested_; + Nested_ n(derived()); + RealScalar z = n.squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if (z > RealScalar(0)) + return n / numext::sqrt(z); + else + return n; +} + +/** Normalizes the vector, i.e. divides it by its own norm. + * + * \only_for_vectors + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa norm(), normalized() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase::normalize() { + RealScalar z = squaredNorm(); + // NOTE: after extensive benchmarking, this conditional does not impact performance, at least on recent x86 CPU + if (z > RealScalar(0)) derived() /= numext::sqrt(z); +} + +/** \returns an expression of the quotient of \c *this by its own norm while avoiding underflow and overflow. + * + * \only_for_vectors + * + * This method is analogue to the normalized() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), + * then this function returns a copy of the input. + * + * \sa stableNorm(), stableNormalize(), normalized() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename MatrixBase::PlainObject +MatrixBase::stableNormalized() const { + typedef typename internal::nested_eval::type Nested_; + Nested_ n(derived()); + RealScalar w = n.cwiseAbs().maxCoeff(); + RealScalar z = (n / w).squaredNorm(); + if (z > RealScalar(0)) + return n / (numext::sqrt(z) * w); + else + return n; +} + +/** Normalizes the vector while avoid underflow and overflow + * + * \only_for_vectors + * + * This method is analogue to the normalize() method, but it reduces the risk of + * underflow and overflow when computing the norm. + * + * \warning If the input vector is too small (i.e., this->norm()==0), then \c *this is left unchanged. + * + * \sa stableNorm(), stableNormalized(), normalize() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void MatrixBase::stableNormalize() { + RealScalar w = cwiseAbs().maxCoeff(); + RealScalar z = (derived() / w).squaredNorm(); + if (z > RealScalar(0)) derived() /= numext::sqrt(z) * w; +} + +//---------- implementation of other norms ---------- + +namespace internal { + +template +struct lpNorm_selector { + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase& m) { + EIGEN_USING_STD(pow) + return pow(m.cwiseAbs().array().pow(p).sum(), RealScalar(1) / p); + } +}; + +template +struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run( + const MatrixBase& m) { + return m.cwiseAbs().sum(); + } +}; + +template +struct lpNorm_selector { + EIGEN_DEVICE_FUNC static inline typename NumTraits::Scalar>::Real run( + const MatrixBase& m) { + return m.norm(); + } +}; + +template +struct lpNorm_selector { + typedef typename NumTraits::Scalar>::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const MatrixBase& m) { + if (Derived::SizeAtCompileTime == 0 || (Derived::SizeAtCompileTime == Dynamic && m.size() == 0)) + return RealScalar(0); + return m.cwiseAbs().maxCoeff(); + } +}; + +} // end namespace internal + +/** \returns the \b coefficient-wise \f$ \ell^p \f$ norm of \c *this, that is, returns the p-th root of the sum of the + * p-th powers of the absolute values of the coefficients of \c *this. If \a p is the special value \a Eigen::Infinity, + * this function returns the \f$ \ell^\infty \f$ norm, that is the maximum of the absolute values of the coefficients of + * \c *this. + * + * In all cases, if \c *this is empty, then the value 0 is returned. + * + * \note For matrices, this function does not compute the operator-norm. That is, if \c *this is a matrix, then its + * coefficients are interpreted as a 1D vector. Nonetheless, you can easily compute the 1-norm and \f$\infty\f$-norm + * matrix operator norms using \link TutorialReductionsVisitorsBroadcastingReductionsNorm partial reductions \endlink. + * + * \sa norm() + */ +template +template +#ifndef EIGEN_PARSED_BY_DOXYGEN +EIGEN_DEVICE_FUNC inline typename NumTraits::Scalar>::Real +#else +EIGEN_DEVICE_FUNC MatrixBase::RealScalar +#endif +MatrixBase::lpNorm() const { + return internal::lpNorm_selector::run(*this); +} + +//---------- implementation of isOrthogonal / isUnitary ---------- + +/** \returns true if *this is approximately orthogonal to \a other, + * within the precision given by \a prec. + * + * Example: \include MatrixBase_isOrthogonal.cpp + * Output: \verbinclude MatrixBase_isOrthogonal.out + */ +template +template +bool MatrixBase::isOrthogonal(const MatrixBase& other, const RealScalar& prec) const { + typename internal::nested_eval::type nested(derived()); + typename internal::nested_eval::type otherNested(other.derived()); + return numext::abs2(nested.dot(otherNested)) <= prec * prec * nested.squaredNorm() * otherNested.squaredNorm(); +} + +/** \returns true if *this is approximately an unitary matrix, + * within the precision given by \a prec. In the case where the \a Scalar + * type is real numbers, a unitary matrix is an orthogonal matrix, whence the name. + * + * \note This can be used to check whether a family of vectors forms an orthonormal basis. + * Indeed, \c m.isUnitary() returns true if and only if the columns (equivalently, the rows) of m form an + * orthonormal basis. + * + * Example: \include MatrixBase_isUnitary.cpp + * Output: \verbinclude MatrixBase_isUnitary.out + */ +template +bool MatrixBase::isUnitary(const RealScalar& prec) const { + typename internal::nested_eval::type self(derived()); + for (Index i = 0; i < cols(); ++i) { + if (!internal::isApprox(self.col(i).squaredNorm(), static_cast(1), prec)) return false; + for (Index j = 0; j < i; ++j) + if (!internal::isMuchSmallerThan(self.col(i).dot(self.col(j)), static_cast(1), prec)) return false; + } + return true; +} + +} // end namespace Eigen + +#endif // EIGEN_DOT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/EigenBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/EigenBase.h new file mode 100644 index 0000000000000000000000000000000000000000..6d69eccc4989e2922a07c262c37bbab1647d1c18 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/EigenBase.h @@ -0,0 +1,149 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_EIGENBASE_H +#define EIGEN_EIGENBASE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class EigenBase + * \ingroup Core_Module + * + * Common base class for all classes T such that MatrixBase has an operator=(T) and a constructor MatrixBase(T). + * + * In other words, an EigenBase object is an object that can be copied into a MatrixBase. + * + * Besides MatrixBase-derived classes, this also includes special matrix classes such as diagonal matrices, etc. + * + * Notice that this class is trivial, it is only used to disambiguate overloaded functions. + * + * \sa \blank \ref TopicClassHierarchy + */ +template +struct EigenBase { + // typedef typename internal::plain_matrix_type::type PlainObject; + + /** \brief The interface type of indices + * \details To change this, \c \#define the preprocessor symbol \c EIGEN_DEFAULT_DENSE_INDEX_TYPE. + * \sa StorageIndex, \ref TopicPreprocessorDirectives. + * DEPRECATED: Since Eigen 3.3, its usage is deprecated. Use Eigen::Index instead. + * Deprecation is not marked with a doxygen comment because there are too many existing usages to add the deprecation + * attribute. + */ + typedef Eigen::Index Index; + + // FIXME is it needed? + typedef typename internal::traits::StorageKind StorageKind; + + /** \returns a reference to the derived object */ + EIGEN_DEVICE_FUNC constexpr Derived& derived() { return *static_cast(this); } + /** \returns a const reference to the derived object */ + EIGEN_DEVICE_FUNC constexpr const Derived& derived() const { return *static_cast(this); } + + EIGEN_DEVICE_FUNC inline constexpr Derived& const_cast_derived() const { + return *static_cast(const_cast(this)); + } + EIGEN_DEVICE_FUNC inline const Derived& const_derived() const { return *static_cast(this); } + + /** \returns the number of rows. \sa cols(), RowsAtCompileTime */ + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return derived().rows(); } + /** \returns the number of columns. \sa rows(), ColsAtCompileTime*/ + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return derived().cols(); } + /** \returns the number of coefficients, which is rows()*cols(). + * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return rows() * cols(); } + + /** \internal Don't use it, but do the equivalent: \code dst = *this; \endcode */ + template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { + derived().evalTo(dst); + } + + /** \internal Don't use it, but do the equivalent: \code dst += *this; \endcode */ + template + EIGEN_DEVICE_FUNC inline void addTo(Dest& dst) const { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(), cols()); + evalTo(res); + dst += res; + } + + /** \internal Don't use it, but do the equivalent: \code dst -= *this; \endcode */ + template + EIGEN_DEVICE_FUNC inline void subTo(Dest& dst) const { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + typename Dest::PlainObject res(rows(), cols()); + evalTo(res); + dst -= res; + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheRight(*this); \endcode */ + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheRight(Dest& dst) const { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = dst * this->derived(); + } + + /** \internal Don't use it, but do the equivalent: \code dst.applyOnTheLeft(*this); \endcode */ + template + EIGEN_DEVICE_FUNC inline void applyThisOnTheLeft(Dest& dst) const { + // This is the default implementation, + // derived class can reimplement it in a more optimized way. + dst = this->derived() * dst; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper device(Device& device); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE DeviceWrapper device(Device& device) const; +}; + +/*************************************************************************** + * Implementation of matrix base methods + ***************************************************************************/ + +/** \brief Copies the generic expression \a other into *this. + * + * \details The expression must provide a (templated) evalTo(Derived& dst) const + * function which does the actual job. In practice, this allows any user to write + * its own special matrix without having to modify MatrixBase + * + * \returns a reference to *this. + */ +template +template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const EigenBase& other) { + call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator+=(const EigenBase& other) { + call_assignment(derived(), other.derived(), internal::add_assign_op()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator-=(const EigenBase& other) { + call_assignment(derived(), other.derived(), internal::sub_assign_op()); + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_EIGENBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Fill.h b/o-voxel/third_party/eigen/Eigen/src/Core/Fill.h new file mode 100644 index 0000000000000000000000000000000000000000..df78f1a96057af9c56070aa127f99dff4ba7de88 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Fill.h @@ -0,0 +1,138 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FILL_H +#define EIGEN_FILL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct eigen_fill_helper : std::false_type {}; + +template +struct eigen_fill_helper> : std::true_type {}; + +template +struct eigen_fill_helper> : std::true_type {}; + +template +struct eigen_fill_helper> : eigen_fill_helper {}; + +template +struct eigen_fill_helper> + : std::integral_constant::value && + (Xpr::IsRowMajor ? (BlockRows == 1) : (BlockCols == 1))> {}; + +template +struct eigen_fill_helper>> : eigen_fill_helper {}; + +template +struct eigen_fill_helper>> + : std::integral_constant::value && + enum_eq_not_dynamic(OuterStride_, Xpr::InnerSizeAtCompileTime)> {}; + +template +struct eigen_fill_helper>> + : eigen_fill_helper>> {}; + +template +struct eigen_fill_helper>> + : eigen_fill_helper>> {}; + +template +struct eigen_fill_helper>> + : eigen_fill_helper>> {}; + +template +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + using Func = scalar_constant_op; + using PlainObject = typename Xpr::PlainObject; + using Constant = typename PlainObject::ConstantReturnType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const Scalar& val) { + const Constant src(dst.rows(), dst.cols(), val); + run(dst, src); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) { + call_dense_assignment_loop(dst, src, assign_op()); + } +}; + +#if EIGEN_COMP_MSVC || defined(EIGEN_GPU_COMPILE_PHASE) +template +struct eigen_fill_impl : eigen_fill_impl {}; +#else +template +struct eigen_fill_impl { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const Scalar& val) { + const Scalar val_copy = val; + using std::fill_n; + fill_n(dst.data(), dst.size(), val_copy); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + resize_if_allowed(dst, src, assign_op()); + const Scalar& val = src.functor()(); + run(dst, val); + } +}; +#endif + +template +struct eigen_memset_helper { + static constexpr bool value = + std::is_trivially_copyable::value && eigen_fill_helper::value; +}; + +template +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + using PlainObject = typename Xpr::PlainObject; + using Zero = typename PlainObject::ZeroReturnType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst) { + const Zero src(dst.rows(), dst.cols()); + run(dst, src); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void run(Xpr& dst, const SrcXpr& src) { + call_dense_assignment_loop(dst, src, assign_op()); + } +}; + +template +struct eigen_zero_impl { + using Scalar = typename Xpr::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst) { + const std::ptrdiff_t num_bytes = dst.size() * static_cast(sizeof(Scalar)); + if (num_bytes <= 0) return; + void* dst_ptr = static_cast(dst.data()); +#ifndef EIGEN_NO_DEBUG + eigen_assert((dst_ptr != nullptr) && "null pointer dereference error!"); +#endif + EIGEN_USING_STD(memset); + memset(dst_ptr, 0, static_cast(num_bytes)); + } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Xpr& dst, const SrcXpr& src) { + resize_if_allowed(dst, src, assign_op()); + run(dst); + } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_FILL_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/FindCoeff.h b/o-voxel/third_party/eigen/Eigen/src/Core/FindCoeff.h new file mode 100644 index 0000000000000000000000000000000000000000..d303e10f1e79b7fee36a35a6e043880f50f879b1 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/FindCoeff.h @@ -0,0 +1,464 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2025 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FIND_COEFF_H +#define EIGEN_FIND_COEFF_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template ::IsInteger> +struct max_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return candidate > incumbent; + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pcmp_lt(incumbent, candidate); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_max(a); + } +}; + +template +struct max_coeff_functor { + EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) { + return (candidate > incumbent) || ((candidate != candidate) && (incumbent == incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) { + return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_max(a); + } +}; + +template +struct max_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return (candidate > incumbent) || ((candidate == candidate) && (incumbent != incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pandnot(pcmp_lt_or_nan(incumbent, candidate), pisnan(candidate)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_max(a); + } +}; + +template ::IsInteger> +struct min_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return candidate < incumbent; + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pcmp_lt(candidate, incumbent); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_min(a); + } +}; + +template +struct min_coeff_functor { + EIGEN_DEVICE_FUNC inline Scalar compareCoeff(const Scalar& incumbent, const Scalar& candidate) { + return (candidate < incumbent) || ((candidate != candidate) && (incumbent == incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) { + return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_min(a); + } +}; + +template +struct min_coeff_functor { + EIGEN_DEVICE_FUNC inline bool compareCoeff(const Scalar& incumbent, const Scalar& candidate) const { + return (candidate < incumbent) || ((candidate == candidate) && (incumbent != incumbent)); + } + template + EIGEN_DEVICE_FUNC inline Packet comparePacket(const Packet& incumbent, const Packet& candidate) const { + return pandnot(pcmp_lt_or_nan(candidate, incumbent), pisnan(candidate)); + } + template + EIGEN_DEVICE_FUNC inline Scalar predux(const Packet& a) const { + return predux_min(a); + } +}; + +template +struct min_max_traits { + static constexpr bool PacketAccess = packet_traits::Vectorizable; +}; +template +struct functor_traits> : min_max_traits {}; +template +struct functor_traits> : min_max_traits {}; + +template +struct find_coeff_loop; +template +struct find_coeff_loop { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& res, Index& outer, Index& inner) { + Index outerSize = eval.outerSize(); + Index innerSize = eval.innerSize(); + + /* initialization performed in calling function */ + /* result = eval.coeff(0, 0); */ + /* outer = 0; */ + /* inner = 0; */ + + for (Index j = 0; j < outerSize; j++) { + for (Index i = 0; i < innerSize; i++) { + Scalar xprCoeff = eval.coeffByOuterInner(j, i); + bool newRes = func.compareCoeff(res, xprCoeff); + if (newRes) { + outer = j; + inner = i; + res = xprCoeff; + } + } + } + } +}; +template +struct find_coeff_loop { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& res, Index& index) { + Index size = eval.size(); + + /* initialization performed in calling function */ + /* result = eval.coeff(0); */ + /* index = 0; */ + + for (Index k = 0; k < size; k++) { + Scalar xprCoeff = eval.coeff(k); + bool newRes = func.compareCoeff(res, xprCoeff); + if (newRes) { + index = k; + res = xprCoeff; + } + } + } +}; +template +struct find_coeff_loop { + using ScalarImpl = find_coeff_loop; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& outer, + Index& inner) { + Index outerSize = eval.outerSize(); + Index innerSize = eval.innerSize(); + Index packetEnd = numext::round_down(innerSize, PacketSize); + + /* initialization performed in calling function */ + /* result = eval.coeff(0, 0); */ + /* outer = 0; */ + /* inner = 0; */ + + bool checkPacket = false; + + for (Index j = 0; j < outerSize; j++) { + Packet resultPacket = pset1(result); + for (Index i = 0; i < packetEnd; i += PacketSize) { + Packet xprPacket = eval.template packetByOuterInner(j, i); + if (predux_any(func.comparePacket(resultPacket, xprPacket))) { + outer = j; + inner = i; + result = func.predux(xprPacket); + resultPacket = pset1(result); + checkPacket = true; + } + } + + for (Index i = packetEnd; i < innerSize; i++) { + Scalar xprCoeff = eval.coeffByOuterInner(j, i); + if (func.compareCoeff(result, xprCoeff)) { + outer = j; + inner = i; + result = xprCoeff; + checkPacket = false; + } + } + } + + if (checkPacket) { + result = eval.coeffByOuterInner(outer, inner); + Index i_end = inner + PacketSize; + for (Index i = inner; i < i_end; i++) { + Scalar xprCoeff = eval.coeffByOuterInner(outer, i); + if (func.compareCoeff(result, xprCoeff)) { + inner = i; + result = xprCoeff; + } + } + } + } +}; +template +struct find_coeff_loop { + using ScalarImpl = find_coeff_loop; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static constexpr int Alignment = Evaluator::Alignment; + + static EIGEN_DEVICE_FUNC inline void run(const Evaluator& eval, Func& func, Scalar& result, Index& index) { + Index size = eval.size(); + Index packetEnd = numext::round_down(size, PacketSize); + + /* initialization performed in calling function */ + /* result = eval.coeff(0); */ + /* index = 0; */ + + Packet resultPacket = pset1(result); + bool checkPacket = false; + + for (Index k = 0; k < packetEnd; k += PacketSize) { + Packet xprPacket = eval.template packet(k); + if (predux_any(func.comparePacket(resultPacket, xprPacket))) { + index = k; + result = func.predux(xprPacket); + resultPacket = pset1(result); + checkPacket = true; + } + } + + for (Index k = packetEnd; k < size; k++) { + Scalar xprCoeff = eval.coeff(k); + if (func.compareCoeff(result, xprCoeff)) { + index = k; + result = xprCoeff; + checkPacket = false; + } + } + + if (checkPacket) { + result = eval.coeff(index); + Index k_end = index + PacketSize; + for (Index k = index; k < k_end; k++) { + Scalar xprCoeff = eval.coeff(k); + if (func.compareCoeff(result, xprCoeff)) { + index = k; + result = xprCoeff; + } + } + } + } +}; + +template +struct find_coeff_evaluator : public evaluator { + using Base = evaluator; + using Scalar = typename Derived::Scalar; + using Packet = typename packet_traits::type; + static constexpr int Flags = Base::Flags; + static constexpr bool IsRowMajor = bool(Flags & RowMajorBit); + EIGEN_DEVICE_FUNC inline find_coeff_evaluator(const Derived& xpr) : Base(xpr), m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC inline Scalar coeffByOuterInner(Index outer, Index inner) const { + Index row = IsRowMajor ? outer : inner; + Index col = IsRowMajor ? inner : outer; + return Base::coeff(row, col); + } + template + EIGEN_DEVICE_FUNC inline PacketType packetByOuterInner(Index outer, Index inner) const { + Index row = IsRowMajor ? outer : inner; + Index col = IsRowMajor ? inner : outer; + return Base::template packet(row, col); + } + + EIGEN_DEVICE_FUNC inline Index innerSize() const { return m_xpr.innerSize(); } + EIGEN_DEVICE_FUNC inline Index outerSize() const { return m_xpr.outerSize(); } + EIGEN_DEVICE_FUNC inline Index size() const { return m_xpr.size(); } + + const Derived& m_xpr; +}; + +template +struct find_coeff_impl { + using Evaluator = find_coeff_evaluator; + static constexpr int Flags = Evaluator::Flags; + static constexpr int Alignment = Evaluator::Alignment; + static constexpr bool IsRowMajor = Derived::IsRowMajor; + static constexpr int MaxInnerSizeAtCompileTime = + IsRowMajor ? Derived::MaxColsAtCompileTime : Derived::MaxRowsAtCompileTime; + static constexpr int MaxSizeAtCompileTime = Derived::MaxSizeAtCompileTime; + + using Scalar = typename Derived::Scalar; + using Packet = typename Evaluator::Packet; + + static constexpr int PacketSize = unpacket_traits::size; + static constexpr bool Linearize = bool(Flags & LinearAccessBit); + static constexpr bool DontVectorize = + enum_lt_not_dynamic(Linearize ? MaxSizeAtCompileTime : MaxInnerSizeAtCompileTime, PacketSize); + static constexpr bool Vectorize = + !DontVectorize && bool(Flags & PacketAccessBit) && functor_traits::PacketAccess; + + using Loop = find_coeff_loop; + + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& outer, + Index& inner) { + Evaluator eval(xpr); + Loop::run(eval, func, res, outer, inner); + } + template = true> + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& outer, + Index& inner) { + // where possible, use the linear loop and back-calculate the outer and inner indices + Index index = 0; + run(xpr, func, res, index); + outer = index / xpr.innerSize(); + inner = index % xpr.innerSize(); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(const Derived& xpr, Func& func, Scalar& res, Index& index) { + Evaluator eval(xpr); + Loop::run(eval, func, res, index); + } +}; + +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar findCoeff(const DenseBase& mat, Func& func, + IndexType* rowPtr, IndexType* colPtr) { + eigen_assert(mat.rows() > 0 && mat.cols() > 0 && "you are using an empty matrix"); + using Scalar = typename DenseBase::Scalar; + using FindCoeffImpl = internal::find_coeff_impl; + Index outer = 0; + Index inner = 0; + Scalar res = mat.coeff(0, 0); + FindCoeffImpl::run(mat.derived(), func, res, outer, inner); + *rowPtr = internal::convert_index(Derived::IsRowMajor ? outer : inner); + if (colPtr) *colPtr = internal::convert_index(Derived::IsRowMajor ? inner : outer); + return res; +} + +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar findCoeff(const DenseBase& mat, Func& func, + IndexType* indexPtr) { + eigen_assert(mat.size() > 0 && "you are using an empty matrix"); + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + using Scalar = typename DenseBase::Scalar; + using FindCoeffImpl = internal::find_coeff_impl; + Index index = 0; + Scalar res = mat.coeff(0); + FindCoeffImpl::run(mat.derived(), func, res, index); + *indexPtr = internal::convert_index(index); + return res; +} + +} // namespace internal + +/** \fn DenseBase::minCoeff(IndexType* rowId, IndexType* colId) const + * \returns the minimum of all coefficients of *this and puts in *row and *col its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::minCoeff(Index*), DenseBase::maxCoeff(Index*,Index*), DenseBase::visit(), DenseBase::minCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::minCoeff(IndexType* rowPtr, + IndexType* colPtr) const { + using Func = internal::min_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, rowPtr, colPtr); +} + +/** \returns the minimum of all coefficients of *this and puts in *index its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::visit(), + * DenseBase::minCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::minCoeff(IndexType* indexPtr) const { + using Func = internal::min_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, indexPtr); +} + +/** \fn DenseBase::maxCoeff(IndexType* rowId, IndexType* colId) const + * \returns the maximum of all coefficients of *this and puts in *row and *col its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visit(), DenseBase::maxCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::maxCoeff(IndexType* rowPtr, + IndexType* colPtr) const { + using Func = internal::max_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, rowPtr, colPtr); +} + +/** \returns the maximum of all coefficients of *this and puts in *index its location. + * + * If there are multiple coefficients with the same extreme value, the location of the first instance is returned. + * + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::maxCoeff(IndexType*,IndexType*), DenseBase::minCoeff(IndexType*,IndexType*), DenseBase::visitor(), + * DenseBase::maxCoeff() + */ +template +template +EIGEN_DEVICE_FUNC typename internal::traits::Scalar DenseBase::maxCoeff(IndexType* indexPtr) const { + using Func = internal::max_coeff_functor; + Func func; + return internal::findCoeff(derived(), func, indexPtr); +} + +} // namespace Eigen + +#endif // EIGEN_FIND_COEFF_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ForceAlignedAccess.h b/o-voxel/third_party/eigen/Eigen/src/Core/ForceAlignedAccess.h new file mode 100644 index 0000000000000000000000000000000000000000..dded2c580574cac42f20b42516968b7b93b21fee --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ForceAlignedAccess.h @@ -0,0 +1,127 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FORCEALIGNEDACCESS_H +#define EIGEN_FORCEALIGNEDACCESS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class ForceAlignedAccess + * \ingroup Core_Module + * + * \brief Enforce aligned packet loads and stores regardless of what is requested + * + * \param ExpressionType the type of the object of which we are forcing aligned packet access + * + * This class is the return type of MatrixBase::forceAlignedAccess() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::forceAlignedAccess() + */ + +namespace internal { +template +struct traits> : public traits {}; +} // namespace internal + +template +class ForceAlignedAccess : public internal::dense_xpr_base>::type { + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ForceAlignedAccess) + + EIGEN_DEVICE_FUNC explicit inline ForceAlignedAccess(const ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_expression.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_expression.innerStride(); } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index row, Index col) const { + return m_expression.coeff(row, col); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { + return m_expression.const_cast_derived().coeffRef(row, col); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { return m_expression.coeff(index); } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { return m_expression.const_cast_derived().coeffRef(index); } + + template + inline const PacketScalar packet(Index row, Index col) const { + return m_expression.template packet(row, col); + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& x) { + m_expression.const_cast_derived().template writePacket(row, col, x); + } + + template + inline const PacketScalar packet(Index index) const { + return m_expression.template packet(index); + } + + template + inline void writePacket(Index index, const PacketScalar& x) { + m_expression.const_cast_derived().template writePacket(index, x); + } + + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + + protected: + const ExpressionType& m_expression; + + private: + ForceAlignedAccess& operator=(const ForceAlignedAccess&); +}; + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(),class ForceAlignedAccess + */ +template +inline const ForceAlignedAccess MatrixBase::forceAlignedAccess() const { + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access + * \sa forceAlignedAccessIf(), class ForceAlignedAccess + */ +template +inline ForceAlignedAccess MatrixBase::forceAlignedAccess() { + return ForceAlignedAccess(derived()); +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline add_const_on_value_type_t, Derived&>> +MatrixBase::forceAlignedAccessIf() const { + return derived(); // FIXME This should not work but apparently is never used +} + +/** \returns an expression of *this with forced aligned access if \a Enable is true. + * \sa forceAlignedAccess(), class ForceAlignedAccess + */ +template +template +inline std::conditional_t, Derived&> MatrixBase::forceAlignedAccessIf() { + return derived(); // FIXME This should not work but apparently is never used +} + +} // end namespace Eigen + +#endif // EIGEN_FORCEALIGNEDACCESS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Fuzzy.h b/o-voxel/third_party/eigen/Eigen/src/Core/Fuzzy.h new file mode 100644 index 0000000000000000000000000000000000000000..aec64c6c7dc98c1ed07b38d69117071582623008 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Fuzzy.h @@ -0,0 +1,132 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_FUZZY_H +#define EIGEN_FUZZY_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template ::IsInteger> +struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { + typename internal::nested_eval::type nested(x); + typename internal::nested_eval::type otherNested(y); + return (nested.matrix() - otherNested.matrix()).cwiseAbs2().sum() <= + prec * prec * numext::mini(nested.cwiseAbs2().sum(), otherNested.cwiseAbs2().sum()); + } +}; + +template +struct isApprox_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar&) { + return x.matrix() == y.matrix(); + } +}; + +template ::IsInteger> +struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived& y, const typename Derived::RealScalar& prec) { + return x.cwiseAbs2().sum() <= numext::abs2(prec) * y.cwiseAbs2().sum(); + } +}; + +template +struct isMuchSmallerThan_object_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const OtherDerived&, const typename Derived::RealScalar&) { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +template ::IsInteger> +struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar& y, + const typename Derived::RealScalar& prec) { + return x.cwiseAbs2().sum() <= numext::abs2(prec * y); + } +}; + +template +struct isMuchSmallerThan_scalar_selector { + EIGEN_DEVICE_FUNC static bool run(const Derived& x, const typename Derived::RealScalar&, + const typename Derived::RealScalar&) { + return x.matrix() == Derived::Zero(x.rows(), x.cols()).matrix(); + } +}; + +} // end namespace internal + +/** \returns \c true if \c *this is approximately equal to \a other, within the precision + * determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. Two vectors \f$ v \f$ and \f$ w \f$ + * are considered to be approximately equal within precision \f$ p \f$ if + * \f[ \Vert v - w \Vert \leqslant p\,\min(\Vert v\Vert, \Vert w\Vert). \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm (aka Frobenius norm + * L2 norm). + * + * \note Because of the multiplicativeness of this comparison, one can't use this function + * to check whether \c *this is approximately equal to the zero matrix or vector. + * Indeed, \c isApprox(zero) returns false unless \c *this itself is exactly the zero matrix + * or vector. If you want to test whether \c *this is zero, use internal::isMuchSmallerThan(const + * RealScalar&, RealScalar) instead. + * + * \sa internal::isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +EIGEN_DEVICE_FUNC bool DenseBase::isApprox(const DenseBase& other, + const RealScalar& prec) const { + return internal::isApprox_selector::run(derived(), other.derived(), prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than \f$ x \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\vert x\vert. \f] + * + * For matrices, the comparison is done using the Hilbert-Schmidt norm. For this reason, + * the value of the reference scalar \a other should come from the Hilbert-Schmidt norm + * of a reference matrix of same dimensions. + * + * \sa isApprox(), isMuchSmallerThan(const DenseBase&, RealScalar) const + */ +template +EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan(const typename NumTraits::Real& other, + const RealScalar& prec) const { + return internal::isMuchSmallerThan_scalar_selector::run(derived(), other, prec); +} + +/** \returns \c true if the norm of \c *this is much smaller than the norm of \a other, + * within the precision determined by \a prec. + * + * \note The fuzzy compares are done multiplicatively. A vector \f$ v \f$ is + * considered to be much smaller than a vector \f$ w \f$ within precision \f$ p \f$ if + * \f[ \Vert v \Vert \leqslant p\,\Vert w\Vert. \f] + * For matrices, the comparison is done using the Hilbert-Schmidt norm. + * + * \sa isApprox(), isMuchSmallerThan(const RealScalar&, RealScalar) const + */ +template +template +EIGEN_DEVICE_FUNC bool DenseBase::isMuchSmallerThan(const DenseBase& other, + const RealScalar& prec) const { + return internal::isMuchSmallerThan_object_selector::run(derived(), other.derived(), prec); +} + +} // end namespace Eigen + +#endif // EIGEN_FUZZY_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/GeneralProduct.h b/o-voxel/third_party/eigen/Eigen/src/Core/GeneralProduct.h new file mode 100644 index 0000000000000000000000000000000000000000..17e866376c5aa0297f3e4c763c051995f10212f5 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/GeneralProduct.h @@ -0,0 +1,519 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERAL_PRODUCT_H +#define EIGEN_GENERAL_PRODUCT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +enum { Large = 2, Small = 3 }; + +// Define the threshold value to fallback from the generic matrix-matrix product +// implementation (heavy) to the lightweight coeff-based product one. +// See generic_product_impl +// in products/GeneralMatrixMatrix.h for more details. +// TODO This threshold should also be used in the compile-time selector below. +#ifndef EIGEN_GEMM_TO_COEFFBASED_THRESHOLD +// This default value has been obtained on a Haswell architecture. +#define EIGEN_GEMM_TO_COEFFBASED_THRESHOLD 20 +#endif + +namespace internal { + +template +struct product_type_selector; + +template +struct product_size_category { + enum { +#ifndef EIGEN_GPU_COMPILE_PHASE + is_large = MaxSize == Dynamic || Size >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD || + (Size == Dynamic && MaxSize >= EIGEN_CACHEFRIENDLY_PRODUCT_THRESHOLD), +#else + is_large = 0, +#endif + value = is_large ? Large + : Size == 1 ? 1 + : Small + }; +}; + +template +struct product_type { + typedef remove_all_t Lhs_; + typedef remove_all_t Rhs_; + enum { + MaxRows = traits::MaxRowsAtCompileTime, + Rows = traits::RowsAtCompileTime, + MaxCols = traits::MaxColsAtCompileTime, + Cols = traits::ColsAtCompileTime, + MaxDepth = min_size_prefer_fixed(traits::MaxColsAtCompileTime, traits::MaxRowsAtCompileTime), + Depth = min_size_prefer_fixed(traits::ColsAtCompileTime, traits::RowsAtCompileTime) + }; + + // the splitting into different lines of code here, introducing the _select enums and the typedef below, + // is to work around an internal compiler error with gcc 4.1 and 4.2. + private: + enum { + rows_select = product_size_category::value, + cols_select = product_size_category::value, + depth_select = product_size_category::value + }; + typedef product_type_selector selector; + + public: + enum { value = selector::ret, ret = selector::ret }; +#ifdef EIGEN_DEBUG_PRODUCT + static void debug() { + EIGEN_DEBUG_VAR(Rows); + EIGEN_DEBUG_VAR(Cols); + EIGEN_DEBUG_VAR(Depth); + EIGEN_DEBUG_VAR(rows_select); + EIGEN_DEBUG_VAR(cols_select); + EIGEN_DEBUG_VAR(depth_select); + EIGEN_DEBUG_VAR(value); + } +#endif +}; + +/* The following allows to select the kind of product at compile time + * based on the three dimensions of the product. + * This is a compile time mapping from {1,Small,Large}^3 -> {product types} */ +// FIXME I'm not sure the current mapping is the ideal one. +template +struct product_type_selector { + enum { ret = OuterProduct }; +}; +template +struct product_type_selector { + enum { ret = LazyCoeffBasedProductMode }; +}; +template +struct product_type_selector<1, N, 1> { + enum { ret = LazyCoeffBasedProductMode }; +}; +template +struct product_type_selector<1, 1, Depth> { + enum { ret = InnerProduct }; +}; +template <> +struct product_type_selector<1, 1, 1> { + enum { ret = InnerProduct }; +}; +template <> +struct product_type_selector { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector<1, Small, Small> { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = LazyCoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = LazyCoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = LazyCoeffBasedProductMode }; +}; +template <> +struct product_type_selector<1, Large, Small> { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector<1, Large, Large> { + enum { ret = GemvProduct }; +}; +template <> +struct product_type_selector<1, Small, Large> { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = GemvProduct }; +}; +template <> +struct product_type_selector { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = GemmProduct }; +}; +template <> +struct product_type_selector { + enum { ret = GemmProduct }; +}; +template <> +struct product_type_selector { + enum { ret = GemmProduct }; +}; +template <> +struct product_type_selector { + enum { ret = GemmProduct }; +}; +template <> +struct product_type_selector { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = CoeffBasedProductMode }; +}; +template <> +struct product_type_selector { + enum { ret = GemmProduct }; +}; + +} // end namespace internal + +/*********************************************************************** + * Implementation of Inner Vector Vector Product + ***********************************************************************/ + +// FIXME : maybe the "inner product" could return a Scalar +// instead of a 1x1 matrix ?? +// Pro: more natural for the user +// Cons: this could be a problem if in a meta unrolled algorithm a matrix-matrix +// product ends up to a row-vector times col-vector product... To tackle this use +// case, we could have a specialization for Block with: operator=(Scalar x); + +/*********************************************************************** + * Implementation of Outer Vector Vector Product + ***********************************************************************/ + +/*********************************************************************** + * Implementation of General Matrix Vector Product + ***********************************************************************/ + +/* According to the shape/flags of the matrix we have to distinghish 3 different cases: + * 1 - the matrix is col-major, BLAS compatible and M is large => call fast BLAS-like colmajor routine + * 2 - the matrix is row-major, BLAS compatible and N is large => call fast BLAS-like rowmajor routine + * 3 - all other cases are handled using a simple loop along the outer-storage direction. + * Therefore we need a lower level meta selector. + * Furthermore, if the matrix is the rhs, then the product has to be transposed. + */ +namespace internal { + +template +struct gemv_dense_selector; + +} // end namespace internal + +namespace internal { + +template +struct gemv_static_vector_if; + +template +struct gemv_static_vector_if { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { + eigen_internal_assert(false && "should never be called"); + return 0; + } +}; + +template +struct gemv_static_vector_if { + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Scalar* data() { return 0; } +}; + +template +struct gemv_static_vector_if { +#if EIGEN_MAX_STATIC_ALIGN_BYTES != 0 + internal::plain_array m_data; + EIGEN_STRONG_INLINE constexpr Scalar* data() { return m_data.array; } +#else + // Some architectures cannot align on the stack, + // => let's manually enforce alignment by allocating more data and return the address of the first aligned element. + internal::plain_array m_data; + EIGEN_STRONG_INLINE constexpr Scalar* data() { + return reinterpret_cast((std::uintptr_t(m_data.array) & ~(std::size_t(EIGEN_MAX_ALIGN_BYTES - 1))) + + EIGEN_MAX_ALIGN_BYTES); + } +#endif +}; + +// The vector is on the left => transposition +template +struct gemv_dense_selector { + template + static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { + Transpose destT(dest); + enum { OtherStorageOrder = StorageOrder == RowMajor ? ColMajor : RowMajor }; + gemv_dense_selector::run(rhs.transpose(), lhs.transpose(), destT, + alpha); + } +}; + +template <> +struct gemv_dense_selector { + template + static inline void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + + typedef Map, plain_enum_min(AlignedMax, internal::packet_traits::size)> + MappedDest; + + ActualLhsType actualLhs = LhsBlasTraits::extract(lhs); + ActualRhsType actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); + + // make sure Dest is a compile-time vector type (bug 1166) + typedef std::conditional_t ActualDest; + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + EvalToDestAtCompileTime = (ActualDest::InnerStrideAtCompileTime == 1), + ComplexByReal = (NumTraits::IsComplex) && (!NumTraits::IsComplex), + MightCannotUseDest = ((!EvalToDestAtCompileTime) || ComplexByReal) && (ActualDest::MaxSizeAtCompileTime != 0) + }; + + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + RhsScalar compatibleAlpha = get_factor::run(actualAlpha); + + if (!MightCannotUseDest) { + // shortcut if we are sure to be able to use dest directly, + // this ease the compiler to generate cleaner and more optimzized code for most common cases + general_matrix_vector_product::run(actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), + actualLhs.outerStride()), + RhsMapper(actualRhs.data(), + actualRhs.innerStride()), + dest.data(), 1, compatibleAlpha); + } else { + gemv_static_vector_if + static_dest; + + const bool alphaIsCompatible = (!ComplexByReal) || (numext::is_exactly_zero(numext::imag(actualAlpha))); + const bool evalToDest = EvalToDestAtCompileTime && alphaIsCompatible; + + ei_declare_aligned_stack_constructed_variable(ResScalar, actualDestPtr, dest.size(), + evalToDest ? dest.data() : static_dest.data()); + + if (!evalToDest) { +#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = Dest::SizeAtCompileTime; + Index size = dest.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN +#endif + if (!alphaIsCompatible) { + MappedDest(actualDestPtr, dest.size()).setZero(); + compatibleAlpha = RhsScalar(1); + } else + MappedDest(actualDestPtr, dest.size()) = dest; + } + + general_matrix_vector_product::run(actualLhs.rows(), actualLhs.cols(), + LhsMapper(actualLhs.data(), + actualLhs.outerStride()), + RhsMapper(actualRhs.data(), + actualRhs.innerStride()), + actualDestPtr, 1, compatibleAlpha); + + if (!evalToDest) { + if (!alphaIsCompatible) + dest.matrix() += actualAlpha * MappedDest(actualDestPtr, dest.size()); + else + dest = MappedDest(actualDestPtr, dest.size()); + } + } + } +}; + +template <> +struct gemv_dense_selector { + template + static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef typename Dest::Scalar ResScalar; + + typedef internal::blas_traits LhsBlasTraits; + typedef typename LhsBlasTraits::DirectLinearAccessType ActualLhsType; + typedef internal::blas_traits RhsBlasTraits; + typedef typename RhsBlasTraits::DirectLinearAccessType ActualRhsType; + typedef internal::remove_all_t ActualRhsTypeCleaned; + + std::add_const_t actualLhs = LhsBlasTraits::extract(lhs); + std::add_const_t actualRhs = RhsBlasTraits::extract(rhs); + + ResScalar actualAlpha = combine_scalar_factors(alpha, lhs, rhs); + + enum { + // FIXME find a way to allow an inner stride on the result if packet_traits::size==1 + // on, the other hand it is good for the cache to pack the vector anyways... + DirectlyUseRhs = + ActualRhsTypeCleaned::InnerStrideAtCompileTime == 1 || ActualRhsTypeCleaned::MaxSizeAtCompileTime == 0 + }; + + gemv_static_vector_if + static_rhs; + + ei_declare_aligned_stack_constructed_variable( + RhsScalar, actualRhsPtr, actualRhs.size(), + DirectlyUseRhs ? const_cast(actualRhs.data()) : static_rhs.data()); + + if (!DirectlyUseRhs) { +#ifdef EIGEN_DENSE_STORAGE_CTOR_PLUGIN + constexpr int Size = ActualRhsTypeCleaned::SizeAtCompileTime; + Index size = actualRhs.size(); + EIGEN_DENSE_STORAGE_CTOR_PLUGIN +#endif + Map(actualRhsPtr, actualRhs.size()) = actualRhs; + } + + typedef const_blas_data_mapper LhsMapper; + typedef const_blas_data_mapper RhsMapper; + general_matrix_vector_product:: + run(actualLhs.rows(), actualLhs.cols(), LhsMapper(actualLhs.data(), actualLhs.outerStride()), + RhsMapper(actualRhsPtr, 1), dest.data(), + dest.col(0).innerStride(), // NOTE if dest is not a vector at compile-time, then dest.innerStride() might + // be wrong. (bug 1166) + actualAlpha); + } +}; + +template <> +struct gemv_dense_selector { + template + static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { + EIGEN_STATIC_ASSERT((!nested_eval::Evaluate), + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + // TODO if rhs is large enough it might be beneficial to make sure that dest is sequentially stored in memory, + // otherwise use a temp + typename nested_eval::type actual_rhs(rhs); + const Index size = rhs.rows(); + for (Index k = 0; k < size; ++k) dest += (alpha * actual_rhs.coeff(k)) * lhs.col(k); + } +}; + +template <> +struct gemv_dense_selector { + template + static void run(const Lhs& lhs, const Rhs& rhs, Dest& dest, const typename Dest::Scalar& alpha) { + EIGEN_STATIC_ASSERT((!nested_eval::Evaluate), + EIGEN_INTERNAL_COMPILATION_ERROR_OR_YOU_MADE_A_PROGRAMMING_MISTAKE); + typename nested_eval::type actual_rhs(rhs); + const Index rows = dest.rows(); + for (Index i = 0; i < rows; ++i) + dest.coeffRef(i) += alpha * (lhs.row(i).cwiseProduct(actual_rhs.transpose())).sum(); + } +}; + +} // end namespace internal + +/*************************************************************************** + * Implementation of matrix base methods + ***************************************************************************/ + +/** \returns the matrix product of \c *this and \a other. + * + * \note If instead of the matrix product you want the coefficient-wise product, see Cwise::operator*(). + * + * \sa lazyProduct(), operator*=(const MatrixBase&), Cwise::operator*() + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product MatrixBase::operator*( + const MatrixBase& other) const { + // A note regarding the function declaration: In MSVC, this function will sometimes + // not be inlined since DenseStorage is an unwindable object for dynamic + // matrices and product types are holding a member to store the result. + // Thus it does not help tagging this function with EIGEN_STRONG_INLINE. + enum { + ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic || + int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT( + ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) +#ifdef EIGEN_DEBUG_PRODUCT + internal::product_type::debug(); +#endif + + return Product(derived(), other.derived()); +} + +/** \returns an expression of the matrix product of \c *this and \a other without implicit evaluation. + * + * The returned product will behave like any other expressions: the coefficients of the product will be + * computed once at a time as requested. This might be useful in some extremely rare cases when only + * a small and no coherent fraction of the result's coefficients have to be computed. + * + * \warning This version of the matrix product can be much much slower. So use it only if you know + * what you are doing and that you measured a true speed improvement. + * + * \sa operator*(const MatrixBase&) + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Product +MatrixBase::lazyProduct(const MatrixBase& other) const { + enum { + ProductIsValid = Derived::ColsAtCompileTime == Dynamic || OtherDerived::RowsAtCompileTime == Dynamic || + int(Derived::ColsAtCompileTime) == int(OtherDerived::RowsAtCompileTime), + AreVectors = Derived::IsVectorAtCompileTime && OtherDerived::IsVectorAtCompileTime, + SameSizes = EIGEN_PREDICATE_SAME_MATRIX_SIZE(Derived, OtherDerived) + }; + // note to the lost user: + // * for a dot product use: v1.dot(v2) + // * for a coeff-wise product use: v1.cwiseProduct(v2) + EIGEN_STATIC_ASSERT( + ProductIsValid || !(AreVectors && SameSizes), + INVALID_VECTOR_VECTOR_PRODUCT__IF_YOU_WANTED_A_DOT_OR_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTIONS) + EIGEN_STATIC_ASSERT(ProductIsValid || !(SameSizes && !AreVectors), + INVALID_MATRIX_PRODUCT__IF_YOU_WANTED_A_COEFF_WISE_PRODUCT_YOU_MUST_USE_THE_EXPLICIT_FUNCTION) + EIGEN_STATIC_ASSERT(ProductIsValid || SameSizes, INVALID_MATRIX_PRODUCT) + + return Product(derived(), other.derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/GenericPacketMath.h b/o-voxel/third_party/eigen/Eigen/src/Core/GenericPacketMath.h new file mode 100644 index 0000000000000000000000000000000000000000..5794c65ddc7ef90ca09cef6e28e04ed7b4f9c767 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/GenericPacketMath.h @@ -0,0 +1,1708 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GENERIC_PACKET_MATH_H +#define EIGEN_GENERIC_PACKET_MATH_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/** \internal + * \file GenericPacketMath.h + * + * Default implementation for types not supported by the vectorization. + * In practice these functions are provided to make easier the writing + * of generic vectorized code. + */ + +#ifndef EIGEN_DEBUG_ALIGNED_LOAD +#define EIGEN_DEBUG_ALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_LOAD +#define EIGEN_DEBUG_UNALIGNED_LOAD +#endif + +#ifndef EIGEN_DEBUG_ALIGNED_STORE +#define EIGEN_DEBUG_ALIGNED_STORE +#endif + +#ifndef EIGEN_DEBUG_UNALIGNED_STORE +#define EIGEN_DEBUG_UNALIGNED_STORE +#endif + +struct default_packet_traits { + enum { + // Ops that are implemented for most types. + HasAdd = 1, + HasSub = 1, + HasShift = 1, + HasMul = 1, + HasNegate = 1, + HasAbs = 1, + HasAbs2 = 1, + HasMin = 1, + HasMax = 1, + HasConj = 1, + HasSetLinear = 1, + HasSign = 1, + HasAbsDiff = 1, + // By default, the nearest integer functions (rint, round, floor, ceil, trunc) are enabled for all scalar and packet + // types + HasRound = 1, + + HasArg = 0, + // This flag is used to indicate whether packet comparison is supported. + // pcmp_eq and pcmp_lt should be defined for it to be true. + HasCmp = 0, + + HasDiv = 0, + HasReciprocal = 0, + HasSqrt = 0, + HasRsqrt = 0, + HasCbrt = 0, + HasExp = 0, + HasExpm1 = 0, + HasLog = 0, + HasLog1p = 0, + HasLog10 = 0, + HasPow = 0, + HasSin = 0, + HasCos = 0, + HasTan = 0, + HasASin = 0, + HasACos = 0, + HasATan = 0, + HasATanh = 0, + HasSinh = 0, + HasCosh = 0, + HasTanh = 0, + HasLGamma = 0, + HasDiGamma = 0, + HasZeta = 0, + HasPolygamma = 0, + HasErf = 0, + HasErfc = 0, + HasNdtri = 0, + HasBessel = 0, + HasIGamma = 0, + HasIGammaDerA = 0, + HasGammaSampleDerAlpha = 0, + HasIGammac = 0, + HasBetaInc = 0 + }; +}; + +template +struct packet_traits : default_packet_traits { + typedef T type; + typedef T half; + enum { + Vectorizable = 0, + size = 1, + AlignedOnScalar = 0, + }; + enum { + HasAdd = 0, + HasSub = 0, + HasAbsDiff = 0, + HasMul = 0, + HasNegate = 0, + HasAbs = 0, + HasAbs2 = 0, + HasMin = 0, + HasMax = 0, + HasConj = 0, + HasSetLinear = 0 + }; +}; + +template +struct packet_traits : packet_traits {}; + +struct default_unpacket_traits { + enum { vectorizable = false, masked_load_available = false, masked_store_available = false }; +}; + +template +struct unpacket_traits : default_unpacket_traits { + typedef T type; + typedef T half; + typedef typename numext::get_integer_by_size::signed_type integer_packet; + enum { + size = 1, + alignment = alignof(T), + }; +}; + +template +struct unpacket_traits : unpacket_traits {}; + +/** \internal A convenience utility for determining if the type is a scalar. + * This is used to enable some generic packet implementations. + */ +template +struct is_scalar { + using Scalar = typename unpacket_traits::type; + enum { value = internal::is_same::value }; +}; + +// automatically and succinctly define combinations of pcast when +// 1) the packets are the same type, or +// 2) the packets differ only in sign. +// In both of these cases, preinterpret (bit_cast) is equivalent to pcast (static_cast) +template ::value && is_scalar::value> +struct is_degenerate_helper : is_same {}; +template <> +struct is_degenerate_helper : std::true_type {}; +template <> +struct is_degenerate_helper : std::true_type {}; +template <> +struct is_degenerate_helper : std::true_type {}; +template <> +struct is_degenerate_helper : std::true_type {}; + +template +struct is_degenerate_helper { + using SrcScalar = typename unpacket_traits::type; + static constexpr int SrcSize = unpacket_traits::size; + using TgtScalar = typename unpacket_traits::type; + static constexpr int TgtSize = unpacket_traits::size; + static constexpr bool value = is_degenerate_helper::value && (SrcSize == TgtSize); +}; + +// is_degenerate::value == is_degenerate::value +template +struct is_degenerate { + static constexpr bool value = + is_degenerate_helper::value || is_degenerate_helper::value; +}; + +template +struct is_half { + using Scalar = typename unpacket_traits::type; + static constexpr int Size = unpacket_traits::size; + using DefaultPacket = typename packet_traits::type; + static constexpr int DefaultSize = unpacket_traits::size; + static constexpr bool value = Size != 1 && Size < DefaultSize; +}; + +template +struct type_casting_traits { + enum { + VectorizedCast = + is_degenerate::value && packet_traits::Vectorizable && packet_traits::Vectorizable, + SrcCoeffRatio = 1, + TgtCoeffRatio = 1 + }; +}; + +// provides a succinct template to define vectorized casting traits with respect to the largest accessible packet types +template +struct vectorized_type_casting_traits { + enum : int { + DefaultSrcPacketSize = packet_traits::size, + DefaultTgtPacketSize = packet_traits::size, + VectorizedCast = 1, + SrcCoeffRatio = plain_enum_max(DefaultTgtPacketSize / DefaultSrcPacketSize, 1), + TgtCoeffRatio = plain_enum_max(DefaultSrcPacketSize / DefaultTgtPacketSize, 1) + }; +}; + +/** \internal Wrapper to ensure that multiple packet types can map to the same + same underlying vector type. */ +template +struct eigen_packet_wrapper { + EIGEN_ALWAYS_INLINE operator T&() { return m_val; } + EIGEN_ALWAYS_INLINE operator const T&() const { return m_val; } + EIGEN_ALWAYS_INLINE eigen_packet_wrapper() = default; + EIGEN_ALWAYS_INLINE eigen_packet_wrapper(const T& v) : m_val(v) {} + EIGEN_ALWAYS_INLINE eigen_packet_wrapper& operator=(const T& v) { + m_val = v; + return *this; + } + + T m_val; +}; + +template ::value> +struct preinterpret_generic; + +template +struct preinterpret_generic { + // the packets are not the same, attempt scalar bit_cast + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Target run(const Packet& a) { + return numext::bit_cast(a); + } +}; + +template +struct preinterpret_generic { + // the packets are the same type: do nothing + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; } +}; + +template +struct preinterpret_generic::as_real, ComplexPacket, false> { + using RealPacket = typename unpacket_traits::as_real; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealPacket run(const ComplexPacket& a) { return a.v; } +}; + +/** \internal \returns reinterpret_cast(a) */ +template +EIGEN_DEVICE_FUNC inline Target preinterpret(const Packet& a) { + return preinterpret_generic::run(a); +} + +template ::value, + bool TgtIsHalf = is_half::value> +struct pcast_generic; + +template +struct pcast_generic { + // the packets are not degenerate: attempt scalar static_cast + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { + return cast_impl::run(a); + } +}; + +template +struct pcast_generic { + // the packets are the same: do nothing + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet& a) { return a; } +}; + +template +struct pcast_generic { + // the packets are degenerate: preinterpret is equivalent to pcast + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { return preinterpret(a); } +}; + +/** \internal \returns static_cast(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a) { + return pcast_generic::run(a); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b) { + return pcast_generic::run(a, b); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, + const SrcPacket& d) { + return pcast_generic::run(a, b, c, d); +} +template +EIGEN_DEVICE_FUNC inline TgtPacket pcast(const SrcPacket& a, const SrcPacket& b, const SrcPacket& c, const SrcPacket& d, + const SrcPacket& e, const SrcPacket& f, const SrcPacket& g, + const SrcPacket& h) { + return pcast_generic::run(a, b, c, d, e, f, g, h); +} + +template +struct pcast_generic { + // TgtPacket is a half packet of some other type + // perform cast and truncate result + using DefaultTgtPacket = typename is_half::DefaultPacket; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TgtPacket run(const SrcPacket& a) { + return preinterpret(pcast(a)); + } +}; + +/** \internal \returns a + b (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet padd(const Packet& a, const Packet& b) { + return a + b; +} +// Avoid compiler warning for boolean algebra. +template <> +EIGEN_DEVICE_FUNC inline bool padd(const bool& a, const bool& b) { + return a || b; +} + +/** \internal \returns a packet version of \a *from, (un-aligned masked add) + * There is no generic implementation. We only have implementations for specialized + * cases. Generic case should not be called. + */ +template +EIGEN_DEVICE_FUNC inline std::enable_if_t::masked_fpops_available, Packet> padd( + const Packet& a, const Packet& b, typename unpacket_traits::mask_t umask); + +/** \internal \returns a - b (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet psub(const Packet& a, const Packet& b) { + return a - b; +} + +/** \internal \returns -a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pnegate(const Packet& a) { + EIGEN_STATIC_ASSERT((!is_same::type, bool>::value), + NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + return numext::negate(a); +} + +/** \internal \returns conj(a) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pconj(const Packet& a) { + return numext::conj(a); +} + +/** \internal \returns a * b (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pmul(const Packet& a, const Packet& b) { + return a * b; +} +// Avoid compiler warning for boolean algebra. +template <> +EIGEN_DEVICE_FUNC inline bool pmul(const bool& a, const bool& b) { + return a && b; +} + +/** \internal \returns a / b (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pdiv(const Packet& a, const Packet& b) { + return a / b; +} +// Avoid compiler warning for boolean algebra. +template <> +EIGEN_DEVICE_FUNC inline bool pdiv(const bool& a, const bool& b) { + return a && b; +} + +// In the generic packet case, memset to all one bits. +template +struct ptrue_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) { + Packet b; + memset(static_cast(&b), 0xff, sizeof(Packet)); + return b; + } +}; + +// Use a value of one for scalars. +template +struct ptrue_impl::value>> { + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&) { return Scalar(1); } +}; + +// For booleans, we can only directly set a valid `bool` value to avoid UB. +template <> +struct ptrue_impl { + static EIGEN_DEVICE_FUNC inline bool run(const bool&) { return true; } +}; + +/** \internal \returns one bits. */ +template +EIGEN_DEVICE_FUNC inline Packet ptrue(const Packet& a) { + return ptrue_impl::run(a); +} + +// In the general packet case, memset to zero. +template +struct pzero_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& /*a*/) { + Packet b; + memset(static_cast(&b), 0x00, sizeof(Packet)); + return b; + } +}; + +// For scalars, explicitly set to Scalar(0), since the underlying representation +// for zero may not consist of all-zero bits. +template +struct pzero_impl::value>> { + static EIGEN_DEVICE_FUNC inline T run(const T& /*a*/) { return T(0); } +}; + +/** \internal \returns packet of zeros */ +template +EIGEN_DEVICE_FUNC inline Packet pzero(const Packet& a) { + return pzero_impl::run(a); +} + +template +struct bit_and { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a & b; } +}; + +template +struct bit_or { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a | b; } +}; + +template +struct bit_xor { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a, const T& b) const { return a ^ b; } +}; + +template +struct bit_not { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE T operator()(const T& a) const { return ~a; } +}; + +template <> +struct bit_and { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a && b; } +}; + +template <> +struct bit_or { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a || b; } +}; + +template <> +struct bit_xor { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a, const bool& b) const { return a != b; } +}; + +template <> +struct bit_not { + EIGEN_DEVICE_FUNC constexpr EIGEN_ALWAYS_INLINE bool operator()(const bool& a) const { return !a; } +}; + +// Use operators &, |, ^, ~. +template +struct operator_bitwise_helper { + EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { return bit_and()(a, b); } + EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return bit_or()(a, b); } + EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { return bit_xor()(a, b); } + EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return bit_not()(a); } +}; + +// Apply binary operations byte-by-byte +template +struct bytewise_bitwise_helper { + EIGEN_DEVICE_FUNC static inline T bitwise_and(const T& a, const T& b) { + return binary(a, b, bit_and()); + } + EIGEN_DEVICE_FUNC static inline T bitwise_or(const T& a, const T& b) { return binary(a, b, bit_or()); } + EIGEN_DEVICE_FUNC static inline T bitwise_xor(const T& a, const T& b) { + return binary(a, b, bit_xor()); + } + EIGEN_DEVICE_FUNC static inline T bitwise_not(const T& a) { return unary(a, bit_not()); } + + private: + template + EIGEN_DEVICE_FUNC static inline T unary(const T& a, Op op) { + const unsigned char* a_ptr = reinterpret_cast(&a); + T c; + unsigned char* c_ptr = reinterpret_cast(&c); + for (size_t i = 0; i < sizeof(T); ++i) { + *c_ptr++ = op(*a_ptr++); + } + return c; + } + + template + EIGEN_DEVICE_FUNC static inline T binary(const T& a, const T& b, Op op) { + const unsigned char* a_ptr = reinterpret_cast(&a); + const unsigned char* b_ptr = reinterpret_cast(&b); + T c; + unsigned char* c_ptr = reinterpret_cast(&c); + for (size_t i = 0; i < sizeof(T); ++i) { + *c_ptr++ = op(*a_ptr++, *b_ptr++); + } + return c; + } +}; + +// In the general case, use byte-by-byte manipulation. +template +struct bitwise_helper : public bytewise_bitwise_helper {}; + +// For integers or non-trivial scalars, use binary operators. +template +struct bitwise_helper::value && + (NumTraits::IsInteger || NumTraits::RequireInitialization)>> + : public operator_bitwise_helper {}; + +/** \internal \returns the bitwise and of \a a and \a b */ +template +EIGEN_DEVICE_FUNC inline Packet pand(const Packet& a, const Packet& b) { + return bitwise_helper::bitwise_and(a, b); +} + +/** \internal \returns the bitwise or of \a a and \a b */ +template +EIGEN_DEVICE_FUNC inline Packet por(const Packet& a, const Packet& b) { + return bitwise_helper::bitwise_or(a, b); +} + +/** \internal \returns the bitwise xor of \a a and \a b */ +template +EIGEN_DEVICE_FUNC inline Packet pxor(const Packet& a, const Packet& b) { + return bitwise_helper::bitwise_xor(a, b); +} + +/** \internal \returns the bitwise not of \a a */ +template +EIGEN_DEVICE_FUNC inline Packet pnot(const Packet& a) { + return bitwise_helper::bitwise_not(a); +} + +/** \internal \returns the bitwise and of \a a and not \a b */ +template +EIGEN_DEVICE_FUNC inline Packet pandnot(const Packet& a, const Packet& b) { + return pand(a, pnot(b)); +} + +/** \internal \returns a < b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt(const Packet& a, const Packet& b) { + return a < b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a == b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_eq(const Packet& a, const Packet& b) { + return a == b ? ptrue(a) : pzero(a); +} + +/** \internal \returns a <= b as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_le(const Packet& a, const Packet& b) { + return por(pcmp_eq(a, b), pcmp_lt(a, b)); +} + +/** \internal \returns a < b or a==NaN or b==NaN as a bit mask */ +template +EIGEN_DEVICE_FUNC inline Packet pcmp_lt_or_nan(const Packet& a, const Packet& b) { + return a >= b ? pzero(a) : ptrue(a); +} + +// In the general case, use bitwise select. +template ::value> +struct pselect_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) { + return por(pand(a, mask), pandnot(b, mask)); + } +}; + +// For scalars, use ternary select. +template +struct pselect_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& mask, const Packet& a, const Packet& b) { + return numext::select(mask, a, b); + } +}; + +/** \internal \returns \a or \b for each field in packet according to \mask */ +template +EIGEN_DEVICE_FUNC inline Packet pselect(const Packet& mask, const Packet& a, const Packet& b) { + return pselect_impl::run(mask, a, b); +} + +template <> +EIGEN_DEVICE_FUNC inline bool pselect(const bool& cond, const bool& a, const bool& b) { + return cond ? a : b; +} + +/** \internal \returns the min or of \a a and \a b (coeff-wise) + If either \a a or \a b are NaN, the result is implementation defined. */ +template +struct pminmax_impl { + template + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { + return op(a, b); + } +}; + +/** \internal \returns the min or max of \a a and \a b (coeff-wise) + If either \a a or \a b are NaN, NaN is returned. */ +template <> +struct pminmax_impl { + template + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { + Packet not_nan_mask_a = pcmp_eq(a, a); + Packet not_nan_mask_b = pcmp_eq(b, b); + return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), b), a); + } +}; + +/** \internal \returns the min or max of \a a and \a b (coeff-wise) + If both \a a and \a b are NaN, NaN is returned. + Equivalent to std::fmin(a, b). */ +template <> +struct pminmax_impl { + template + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a, const Packet& b, Op op) { + Packet not_nan_mask_a = pcmp_eq(a, a); + Packet not_nan_mask_b = pcmp_eq(b, b); + return pselect(not_nan_mask_a, pselect(not_nan_mask_b, op(a, b), a), b); + } +}; + +#define EIGEN_BINARY_OP_NAN_PROPAGATION(Type, Func) [](const Type& aa, const Type& bb) { return Func(aa, bb); } + +/** \internal \returns the min of \a a and \a b (coeff-wise). + If \a a or \b b is NaN, the return value is implementation defined. */ +template +EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) { + return numext::mini(a, b); +} + +/** \internal \returns the min of \a a and \a b (coeff-wise). + NaNPropagation determines the NaN propagation semantics. */ +template +EIGEN_DEVICE_FUNC inline Packet pmin(const Packet& a, const Packet& b) { + constexpr bool IsInteger = NumTraits::type>::IsInteger; + return pminmax_impl::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmin))); +} + +/** \internal \returns the max of \a a and \a b (coeff-wise) + If \a a or \b b is NaN, the return value is implementation defined. */ +template +EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) { + return numext::maxi(a, b); +} + +/** \internal \returns the max of \a a and \a b (coeff-wise). + NaNPropagation determines the NaN propagation semantics. */ +template +EIGEN_DEVICE_FUNC inline Packet pmax(const Packet& a, const Packet& b) { + constexpr bool IsInteger = NumTraits::type>::IsInteger; + return pminmax_impl::run(a, b, EIGEN_BINARY_OP_NAN_PROPAGATION(Packet, (pmax))); +} + +/** \internal \returns the absolute value of \a a */ +template +EIGEN_DEVICE_FUNC inline Packet pabs(const Packet& a) { + return numext::abs(a); +} +template <> +EIGEN_DEVICE_FUNC inline unsigned int pabs(const unsigned int& a) { + return a; +} +template <> +EIGEN_DEVICE_FUNC inline unsigned long pabs(const unsigned long& a) { + return a; +} +template <> +EIGEN_DEVICE_FUNC inline unsigned long long pabs(const unsigned long long& a) { + return a; +} + +/** \internal \returns the addsub value of \a a,b */ +template +EIGEN_DEVICE_FUNC inline Packet paddsub(const Packet& a, const Packet& b) { + return pselect(peven_mask(a), padd(a, b), psub(a, b)); +} + +/** \internal \returns the phase angle of \a a */ +template +EIGEN_DEVICE_FUNC inline Packet parg(const Packet& a) { + using numext::arg; + return arg(a); +} + +/** \internal \returns \a a arithmetically shifted by N bits to the right */ +template +EIGEN_DEVICE_FUNC inline T parithmetic_shift_right(const T& a) { + return numext::arithmetic_shift_right(a, N); +} + +/** \internal \returns \a a logically shifted by N bits to the right */ +template +EIGEN_DEVICE_FUNC inline T plogical_shift_right(const T& a) { + return numext::logical_shift_right(a, N); +} + +/** \internal \returns \a a shifted by N bits to the left */ +template +EIGEN_DEVICE_FUNC inline T plogical_shift_left(const T& a) { + return numext::logical_shift_left(a, N); +} + +/** \internal \returns the significant and exponent of the underlying floating point numbers + * See https://en.cppreference.com/w/cpp/numeric/math/frexp + */ +template +EIGEN_DEVICE_FUNC inline Packet pfrexp(const Packet& a, Packet& exponent) { + int exp; + EIGEN_USING_STD(frexp); + Packet result = static_cast(frexp(a, &exp)); + exponent = static_cast(exp); + return result; +} + +/** \internal \returns a * 2^((int)exponent) + * See https://en.cppreference.com/w/cpp/numeric/math/ldexp + */ +template +EIGEN_DEVICE_FUNC inline Packet pldexp(const Packet& a, const Packet& exponent) { + EIGEN_USING_STD(ldexp) + return static_cast(ldexp(a, static_cast(exponent))); +} + +/** \internal \returns the min of \a a and \a b (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline std::enable_if_t::type>::IsInteger, Packet> +pabsdiff(const Packet& a, const Packet& b) { + return pselect(pcmp_lt(a, b), psub(b, a), psub(a, b)); +} +template +EIGEN_DEVICE_FUNC inline std::enable_if_t::type>::IsInteger, Packet> +pabsdiff(const Packet& a, const Packet& b) { + return pabs(psub(a, b)); +} + +/** \internal \returns a packet version of \a *from, from must be properly aligned */ +template +EIGEN_DEVICE_FUNC inline Packet pload(const typename unpacket_traits::type* from) { + return *from; +} + +/** \internal \returns n elements of a packet version of \a *from, from must be properly aligned + * offset indicates the starting element in which to load and + * offset + n <= unpacket_traits::size + * All elements before offset and after the last element loaded will initialized with zero */ +template +EIGEN_DEVICE_FUNC inline Packet pload_partial(const typename unpacket_traits::type* from, const Index n, + const Index offset = 0) { + const Index packet_size = unpacket_traits::size; + eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet"); + typedef typename unpacket_traits::type Scalar; + EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)}; + for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) { + elements[i] = from[i - offset]; + } + return pload(elements); +} + +/** \internal \returns a packet version of \a *from, (un-aligned load) */ +template +EIGEN_DEVICE_FUNC inline Packet ploadu(const typename unpacket_traits::type* from) { + return *from; +} + +/** \internal \returns n elements of a packet version of \a *from, (un-aligned load) + * All elements after the last element loaded will initialized with zero */ +template +EIGEN_DEVICE_FUNC inline Packet ploadu_partial(const typename unpacket_traits::type* from, const Index n, + const Index offset = 0) { + const Index packet_size = unpacket_traits::size; + eigen_assert(n + offset <= packet_size && "number of elements plus offset will read past end of packet"); + typedef typename unpacket_traits::type Scalar; + EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)}; + for (Index i = offset; i < numext::mini(n + offset, packet_size); i++) { + elements[i] = from[i - offset]; + } + return pload(elements); +} + +/** \internal \returns a packet version of \a *from, (un-aligned masked load) + * There is no generic implementation. We only have implementations for specialized + * cases. Generic case should not be called. + */ +template +EIGEN_DEVICE_FUNC inline std::enable_if_t::masked_load_available, Packet> ploadu( + const typename unpacket_traits::type* from, typename unpacket_traits::mask_t umask); + +/** \internal \returns a packet with constant coefficients \a a, e.g.: (a,a,a,a) */ +template +EIGEN_DEVICE_FUNC inline Packet pset1(const typename unpacket_traits::type& a) { + return a; +} + +/** \internal \returns a packet with constant coefficients set from bits */ +template +EIGEN_DEVICE_FUNC inline Packet pset1frombits(BitsType a); + +/** \internal \returns a packet with constant coefficients \a a[0], e.g.: (a[0],a[0],a[0],a[0]) */ +template +EIGEN_DEVICE_FUNC inline Packet pload1(const typename unpacket_traits::type* a) { + return pset1(*a); +} + +/** \internal \returns a packet with elements of \a *from duplicated. + * For instance, for a packet of 8 elements, 4 scalars will be read from \a *from and + * duplicated to form: {from[0],from[0],from[1],from[1],from[2],from[2],from[3],from[3]} + * Currently, this function is only used for scalar * complex products. + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ploaddup(const typename unpacket_traits::type* from) { + return *from; +} + +/** \internal \returns a packet with elements of \a *from quadrupled. + * For instance, for a packet of 8 elements, 2 scalars will be read from \a *from and + * replicated to form: {from[0],from[0],from[0],from[0],from[1],from[1],from[1],from[1]} + * Currently, this function is only used in matrix products. + * For packet-size smaller or equal to 4, this function is equivalent to pload1 + */ +template +EIGEN_DEVICE_FUNC inline Packet ploadquad(const typename unpacket_traits::type* from) { + return pload1(from); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * a2 = pload1(a+2); + * a3 = pload1(a+3); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast2 + */ +template +EIGEN_DEVICE_FUNC inline void pbroadcast4(const typename unpacket_traits::type* a, Packet& a0, Packet& a1, + Packet& a2, Packet& a3) { + a0 = pload1(a + 0); + a1 = pload1(a + 1); + a2 = pload1(a + 2); + a3 = pload1(a + 3); +} + +/** \internal equivalent to + * \code + * a0 = pload1(a+0); + * a1 = pload1(a+1); + * \endcode + * \sa pset1, pload1, ploaddup, pbroadcast4 + */ +template +EIGEN_DEVICE_FUNC inline void pbroadcast2(const typename unpacket_traits::type* a, Packet& a0, Packet& a1) { + a0 = pload1(a + 0); + a1 = pload1(a + 1); +} + +/** \internal \brief Returns a packet with coefficients (a,a+1,...,a+packet_size-1). */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet plset(const typename unpacket_traits::type& a) { + return a; +} + +template +struct peven_mask_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run(const Packet&) { + typedef typename unpacket_traits::type Scalar; + const size_t n = unpacket_traits::size; + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n]; + for (size_t i = 0; i < n; ++i) { + memset(elements + i, ((i & 1) == 0 ? 0xff : 0), sizeof(Scalar)); + } + return ploadu(elements); + } +}; + +template +struct peven_mask_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar&) { return Scalar(1); } +}; + +/** \internal \returns a packet with constant coefficients \a a, e.g.: (x, 0, x, 0), + where x is the value of all 1-bits. */ +template +EIGEN_DEVICE_FUNC inline Packet peven_mask(const Packet& a) { + return peven_mask_impl::run(a); +} + +/** \internal copy the packet \a from to \a *to, \a to must be properly aligned */ +template +EIGEN_DEVICE_FUNC inline void pstore(Scalar* to, const Packet& from) { + (*to) = from; +} + +/** \internal copy n elements of the packet \a from to \a *to, \a to must be properly aligned + * offset indicates the starting element in which to store and + * offset + n <= unpacket_traits::size */ +template +EIGEN_DEVICE_FUNC inline void pstore_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) { + const Index packet_size = unpacket_traits::size; + eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet"); + EIGEN_ALIGN_MAX Scalar elements[packet_size]; + pstore(elements, from); + for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) { + to[i] = elements[i + offset]; + } +} + +/** \internal copy the packet \a from to \a *to, (un-aligned store) */ +template +EIGEN_DEVICE_FUNC inline void pstoreu(Scalar* to, const Packet& from) { + (*to) = from; +} + +/** \internal copy n elements of the packet \a from to \a *to, (un-aligned store) */ +template +EIGEN_DEVICE_FUNC inline void pstoreu_partial(Scalar* to, const Packet& from, const Index n, const Index offset = 0) { + const Index packet_size = unpacket_traits::size; + eigen_assert(n + offset <= packet_size && "number of elements plus offset will write past end of packet"); + EIGEN_ALIGN_MAX Scalar elements[packet_size]; + pstore(elements, from); + for (Index i = 0; i < numext::mini(n, packet_size - offset); i++) { + to[i] = elements[i + offset]; + } +} + +/** \internal copy the packet \a from to \a *to, (un-aligned store with a mask) + * There is no generic implementation. We only have implementations for specialized + * cases. Generic case should not be called. + */ +template +EIGEN_DEVICE_FUNC inline std::enable_if_t::masked_store_available, void> pstoreu( + Scalar* to, const Packet& from, typename unpacket_traits::mask_t umask); + +template +EIGEN_DEVICE_FUNC inline Packet pgather(const Scalar* from, Index /*stride*/) { + return ploadu(from); +} + +template +EIGEN_DEVICE_FUNC inline Packet pgather_partial(const Scalar* from, Index stride, const Index n) { + const Index packet_size = unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar elements[packet_size] = {Scalar(0)}; + for (Index i = 0; i < numext::mini(n, packet_size); i++) { + elements[i] = from[i * stride]; + } + return pload(elements); +} + +template +EIGEN_DEVICE_FUNC inline void pscatter(Scalar* to, const Packet& from, Index /*stride*/) { + pstore(to, from); +} + +template +EIGEN_DEVICE_FUNC inline void pscatter_partial(Scalar* to, const Packet& from, Index stride, const Index n) { + const Index packet_size = unpacket_traits::size; + EIGEN_ALIGN_MAX Scalar elements[packet_size]; + pstore(elements, from); + for (Index i = 0; i < numext::mini(n, packet_size); i++) { + to[i * stride] = elements[i]; + } +} + +/** \internal tries to do cache prefetching of \a addr */ +template +EIGEN_DEVICE_FUNC inline void prefetch(const Scalar* addr) { +#if defined(EIGEN_HIP_DEVICE_COMPILE) + // do nothing +#elif defined(EIGEN_CUDA_ARCH) +#if defined(__LP64__) || EIGEN_OS_WIN64 + // 64-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=l"(addr) : "l"(addr)); +#else + // 32-bit pointer operand constraint for inlined asm + asm(" prefetch.L1 [ %1 ];" : "=r"(addr) : "r"(addr)); +#endif +#elif (!EIGEN_COMP_MSVC) && (EIGEN_COMP_GNUC || EIGEN_COMP_CLANG || EIGEN_COMP_ICC) + __builtin_prefetch(addr); +#endif +} + +/** \internal \returns the reversed elements of \a a*/ +template +EIGEN_DEVICE_FUNC inline Packet preverse(const Packet& a) { + return a; +} + +/** \internal \returns \a a with real and imaginary parts flipped (for complex types only) */ +template +EIGEN_DEVICE_FUNC inline Packet pcplxflip(const Packet& a) { + return Packet(numext::imag(a), numext::real(a)); +} + +/** \internal \returns \a a with real part duplicated (for complex types only) */ +// TODO(rmlarsen): Define and use in all complex backends. +template +EIGEN_DEVICE_FUNC inline Packet pdupreal(const Packet& a) { + return Packet(numext::real(a), numext::real(a)); +} + +/** \internal \returns \a a with imaginary part duplicated (for complex types only) */ +// TODO(rmlarsen): Define and use in all complex backends. +template +EIGEN_DEVICE_FUNC inline Packet pdupimag(const Packet& a) { + return Packet(numext::imag(a), numext::imag(a)); +} + +/************************** + * Special math functions + ***************************/ + +/** \internal \returns isnan(a) */ +template +EIGEN_DEVICE_FUNC inline Packet pisnan(const Packet& a) { + return pandnot(ptrue(a), pcmp_eq(a, a)); +} + +/** \internal \returns isinf(a) */ +template +EIGEN_DEVICE_FUNC inline Packet pisinf(const Packet& a) { + using Scalar = typename unpacket_traits::type; + constexpr Scalar inf = NumTraits::infinity(); + return pcmp_eq(pabs(a), pset1(inf)); +} + +/** \internal \returns the sine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psin(const Packet& a) { + EIGEN_USING_STD(sin); + return sin(a); +} + +/** \internal \returns the cosine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcos(const Packet& a) { + EIGEN_USING_STD(cos); + return cos(a); +} + +/** \internal \returns the tan of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptan(const Packet& a) { + EIGEN_USING_STD(tan); + return tan(a); +} + +/** \internal \returns the arc sine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pasin(const Packet& a) { + EIGEN_USING_STD(asin); + return asin(a); +} + +/** \internal \returns the arc cosine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pacos(const Packet& a) { + EIGEN_USING_STD(acos); + return acos(a); +} + +/** \internal \returns the hyperbolic sine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psinh(const Packet& a) { + EIGEN_USING_STD(sinh); + return sinh(a); +} + +/** \internal \returns the hyperbolic cosine of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcosh(const Packet& a) { + EIGEN_USING_STD(cosh); + return cosh(a); +} + +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patan(const Packet& a) { + EIGEN_USING_STD(atan); + return atan(a); +} + +/** \internal \returns the hyperbolic tan of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet ptanh(const Packet& a) { + EIGEN_USING_STD(tanh); + return tanh(a); +} + +/** \internal \returns the arc tangent of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet patanh(const Packet& a) { + EIGEN_USING_STD(atanh); + return atanh(a); +} + +/** \internal \returns the exp of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp(const Packet& a) { + return numext::exp(a); +} + +/** \internal \returns the exp2 of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexp2(const Packet& a) { + return numext::exp2(a); +} + +/** \internal \returns the expm1 of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pexpm1(const Packet& a) { + return numext::expm1(a); +} + +/** \internal \returns the log of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog(const Packet& a) { + EIGEN_USING_STD(log); + return log(a); +} + +/** \internal \returns the log1p of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog1p(const Packet& a) { + return numext::log1p(a); +} + +/** \internal \returns the log10 of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog10(const Packet& a) { + EIGEN_USING_STD(log10); + return log10(a); +} + +/** \internal \returns the log2 of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet plog2(const Packet& a) { + using Scalar = typename internal::unpacket_traits::type; + using RealScalar = typename NumTraits::Real; + return pmul(pset1(Scalar(RealScalar(EIGEN_LOG2E))), plog(a)); +} + +/** \internal \returns the square-root of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet psqrt(const Packet& a) { + return numext::sqrt(a); +} + +/** \internal \returns the cube-root of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet pcbrt(const Packet& a) { + return numext::cbrt(a); +} + +template ::value, + bool IsInteger = NumTraits::type>::IsInteger> +struct nearest_integer_packetop_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_floor(const Packet& x) { return numext::floor(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_ceil(const Packet& x) { return numext::ceil(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_rint(const Packet& x) { return numext::rint(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_round(const Packet& x) { return numext::round(x); } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet run_trunc(const Packet& x) { return numext::trunc(x); } +}; + +/** \internal \returns the rounded value of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pround(const Packet& a) { + return nearest_integer_packetop_impl::run_round(a); +} + +/** \internal \returns the floor of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pfloor(const Packet& a) { + return nearest_integer_packetop_impl::run_floor(a); +} + +/** \internal \returns the rounded value of \a a (coeff-wise) with current + * rounding mode */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet print(const Packet& a) { + return nearest_integer_packetop_impl::run_rint(a); +} + +/** \internal \returns the ceil of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet pceil(const Packet& a) { + return nearest_integer_packetop_impl::run_ceil(a); +} + +/** \internal \returns the truncation of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet ptrunc(const Packet& a) { + return nearest_integer_packetop_impl::run_trunc(a); +} + +template +struct psign_impl { + static EIGEN_DEVICE_FUNC inline Packet run(const Packet& a) { return numext::sign(a); } +}; + +/** \internal \returns the sign of \a a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet psign(const Packet& a) { + return psign_impl::run(a); +} + +template <> +EIGEN_DEVICE_FUNC inline bool psign(const bool& a) { + return a; +} + +/** \internal \returns the first element of a packet */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type pfirst(const Packet& a) { + return a; +} + +/** \internal \returns the sum of the elements of upper and lower half of \a a if \a a is larger than 4. + * For a packet {a0, a1, a2, a3, a4, a5, a6, a7}, it returns a half packet {a0+a4, a1+a5, a2+a6, a3+a7} + * For packet-size smaller or equal to 4, this boils down to a noop. + */ +template +EIGEN_DEVICE_FUNC inline std::conditional_t<(unpacket_traits::size % 8) == 0, + typename unpacket_traits::half, Packet> +predux_half(const Packet& a) { + return a; +} + +// Slow generic implementation of Packet reduction. +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_helper(const Packet& a, Op op) { + typedef typename unpacket_traits::type Scalar; + const size_t n = unpacket_traits::size; + EIGEN_ALIGN_TO_BOUNDARY(sizeof(Packet)) Scalar elements[n]; + pstoreu(elements, a); + for (size_t k = n / 2; k > 0; k /= 2) { + for (size_t i = 0; i < k; ++i) { + elements[i] = op(elements[i], elements[i + k]); + } + } + return elements[0]; +} + +/** \internal \returns the sum of the elements of \a a*/ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux(const Packet& a) { + return a; +} + +/** \internal \returns the product of the elements of \a a */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_mul(const Packet& a) { + typedef typename unpacket_traits::type Scalar; + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmul))); +} + +/** \internal \returns the min of the elements of \a a */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { + typedef typename unpacket_traits::type Scalar; + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin))); +} + +/** \internal \returns the max of the elements of \a a */ +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { + typedef typename unpacket_traits::type Scalar; + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax))); +} + +template +struct predux_min_max_helper_impl { + using Scalar = typename unpacket_traits::type; + static constexpr bool UsePredux_ = NaNPropagation == PropagateFast || NumTraits::IsInteger; + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) { + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmin))); + } + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) { + return predux_helper(a, EIGEN_BINARY_OP_NAN_PROPAGATION(Scalar, (pmax))); + } + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_min(const Packet& a) { + return predux_min(a); + } + template = true> + static EIGEN_DEVICE_FUNC inline Scalar run_max(const Packet& a) { + return predux_max(a); + } +}; + +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_min(const Packet& a) { + return predux_min_max_helper_impl::run_min(a); +} + +template +EIGEN_DEVICE_FUNC inline typename unpacket_traits::type predux_max(const Packet& a) { + return predux_min_max_helper_impl::run_max(a); +} + +#undef EIGEN_BINARY_OP_NAN_PROPAGATION + +/** \internal \returns true if all coeffs of \a a means "true" + * It is supposed to be called on values returned by pcmp_*. + */ +// not needed yet +// template EIGEN_DEVICE_FUNC inline bool predux_all(const Packet& a) +// { return bool(a); } + +/** \internal \returns true if any coeffs of \a a means "true" + * It is supposed to be called on values returned by pcmp_*. + */ +template +EIGEN_DEVICE_FUNC inline bool predux_any(const Packet& a) { + // Dirty but generic implementation where "true" is assumed to be non 0 and all the sames. + // It is expected that "true" is either: + // - Scalar(1) + // - bits full of ones (NaN for floats), + // - or first bit equals to 1 (1 for ints, smallest denormal for floats). + // For all these cases, taking the sum is just fine, and this boils down to a no-op for scalars. + typedef typename unpacket_traits::type Scalar; + return numext::not_equal_strict(predux(a), Scalar(0)); +} + +/*************************************************************************** + * The following functions might not have to be overwritten for vectorized types + ***************************************************************************/ + +template +struct pmadd_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { + return padd(pmul(a, b), c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pmsub(const Packet& a, const Packet& b, const Packet& c) { + return psub(pmul(a, b), c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { + return psub(c, pmul(a, b)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { + return pnegate(padd(pmul(a, b), c)); + } +}; + +template +struct pmadd_impl::value && NumTraits::IsSigned>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::madd(a, b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::madd(a, b, Scalar(-c)); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmadd(const Scalar& a, const Scalar& b, const Scalar& c) { + return numext::madd(Scalar(-a), b, c); + } + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar pnmsub(const Scalar& a, const Scalar& b, const Scalar& c) { + return -Scalar(numext::madd(a, b, c)); + } +}; + +// Multiply-add instructions. +/** \internal \returns a * b + c (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pmadd(const Packet& a, const Packet& b, const Packet& c) { + return pmadd_impl::pmadd(a, b, c); +} + +/** \internal \returns a * b - c (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pmsub(const Packet& a, const Packet& b, const Packet& c) { + return pmadd_impl::pmsub(a, b, c); +} + +/** \internal \returns -(a * b) + c (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pnmadd(const Packet& a, const Packet& b, const Packet& c) { + return pmadd_impl::pnmadd(a, b, c); +} + +/** \internal \returns -((a * b + c) (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet pnmsub(const Packet& a, const Packet& b, const Packet& c) { + return pmadd_impl::pnmsub(a, b, c); +} + +/** \internal copy a packet with constant coefficient \a a (e.g., [a,a,a,a]) to \a *to. \a to must be 16 bytes aligned + */ +// NOTE: this function must really be templated on the packet type (think about different packet types for the same +// scalar type) +template +inline void pstore1(typename unpacket_traits::type* to, const typename unpacket_traits::type& a) { + pstore(to, pset1(a)); +} + +/** \internal \returns a packet version of \a *from. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt(const typename unpacket_traits::type* from) { + if (Alignment >= unpacket_traits::alignment) + return pload(from); + else + return ploadu(from); +} + +/** \internal \returns n elements of a packet version of \a *from. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_partial(const typename unpacket_traits::type* from, + const Index n, const Index offset = 0) { + if (Alignment >= unpacket_traits::alignment) + return pload_partial(from, n, offset); + else + return ploadu_partial(from, n, offset); +} + +/** \internal copy the packet \a from to \a *to. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret(Scalar* to, const Packet& from) { + if (Alignment >= unpacket_traits::alignment) + pstore(to, from); + else + pstoreu(to, from); +} + +/** \internal copy n elements of the packet \a from to \a *to. + * The pointer \a from must be aligned on a \a Alignment bytes boundary. */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void pstoret_partial(Scalar* to, const Packet& from, const Index n, + const Index offset = 0) { + if (Alignment >= unpacket_traits::alignment) + pstore_partial(to, from, n, offset); + else + pstoreu_partial(to, from, n, offset); +} + +/** \internal \returns a packet version of \a *from. + * Unlike ploadt, ploadt_ro takes advantage of the read-only memory path on the + * hardware if available to speedup the loading of data that won't be modified + * by the current computation. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet ploadt_ro(const typename unpacket_traits::type* from) { + return ploadt(from); +} + +/*************************************************************************** + * Fast complex products (GCC generates a function call which is very slow) + ***************************************************************************/ + +// Eigen+CUDA does not support complexes. +#if !defined(EIGEN_GPUCC) + +template <> +inline std::complex pmul(const std::complex& a, const std::complex& b) { + return std::complex(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag()); +} + +template <> +inline std::complex pmul(const std::complex& a, const std::complex& b) { + return std::complex(a.real() * b.real() - a.imag() * b.imag(), a.imag() * b.real() + a.real() * b.imag()); +} + +#endif + +/*************************************************************************** + * PacketBlock, that is a collection of N packets where the number of words + * in the packet is a multiple of N. + ***************************************************************************/ +template ::size> +struct PacketBlock { + Packet packet[N]; +}; + +template +EIGEN_DEVICE_FUNC inline void ptranspose(PacketBlock& /*kernel*/) { + // Nothing to do in the scalar case, i.e. a 1x1 matrix. +} + +/** \internal \returns 1 / a (coeff-wise) */ +template +EIGEN_DEVICE_FUNC inline Packet preciprocal(const Packet& a) { + using Scalar = typename unpacket_traits::type; + return pdiv(pset1(Scalar(1)), a); +} + +/** \internal \returns the reciprocal square-root of \a a (coeff-wise) */ +template +EIGEN_DECLARE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS Packet prsqrt(const Packet& a) { + return preciprocal(psqrt(a)); +} + +template ::value, + bool IsInteger = NumTraits::type>::IsInteger> +struct psignbit_impl; +template +struct psignbit_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return numext::signbit(a); } +}; +template +struct psignbit_impl { + // generic implementation if not specialized in PacketMath.h + // slower than arithmetic shift + typedef typename unpacket_traits::type Scalar; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Packet run(const Packet& a) { + const Packet cst_pos_one = pset1(Scalar(1)); + const Packet cst_neg_one = pset1(Scalar(-1)); + return pcmp_eq(por(pand(a, cst_neg_one), cst_pos_one), cst_neg_one); + } +}; +template +struct psignbit_impl { + // generic implementation for integer packets + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Packet run(const Packet& a) { return pcmp_lt(a, pzero(a)); } +}; +/** \internal \returns the sign bit of \a a as a bitmask*/ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr Packet psignbit(const Packet& a) { + return psignbit_impl::run(a); +} + +/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */ +template ::value, int> = 0> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) { + return numext::atan2(y, x); +} + +/** \internal \returns the 2-argument arc tangent of \a y and \a x (coeff-wise) */ +template ::value, int> = 0> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet patan2(const Packet& y, const Packet& x) { + typedef typename internal::unpacket_traits::type Scalar; + + // See https://en.cppreference.com/w/cpp/numeric/math/atan2 + // for how corner cases are supposed to be handled according to the + // IEEE floating-point standard (IEC 60559). + const Packet kSignMask = pset1(-Scalar(0)); + const Packet kZero = pzero(x); + const Packet kOne = pset1(Scalar(1)); + const Packet kPi = pset1(Scalar(EIGEN_PI)); + + const Packet x_has_signbit = psignbit(x); + const Packet y_signmask = pand(y, kSignMask); + const Packet x_signmask = pand(x, kSignMask); + const Packet result_signmask = pxor(y_signmask, x_signmask); + const Packet shift = por(pand(x_has_signbit, kPi), y_signmask); + + const Packet x_and_y_are_same = pcmp_eq(pabs(x), pabs(y)); + const Packet x_and_y_are_zero = pcmp_eq(por(x, y), kZero); + + Packet arg = pdiv(y, x); + arg = pselect(x_and_y_are_same, por(kOne, result_signmask), arg); + arg = pselect(x_and_y_are_zero, result_signmask, arg); + + Packet result = patan(arg); + result = padd(result, shift); + return result; +} + +/** \internal \returns the argument of \a a as a complex number */ +template ::value, int> = 0> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) { + return Packet(numext::arg(a)); +} + +/** \internal \returns the argument of \a a as a complex number */ +template ::value, int> = 0> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Packet pcarg(const Packet& a) { + EIGEN_STATIC_ASSERT(NumTraits::type>::IsComplex, + THIS METHOD IS FOR COMPLEX TYPES ONLY) + using RealPacket = typename unpacket_traits::as_real; + // a // r i r i ... + RealPacket aflip = pcplxflip(a).v; // i r i r ... + RealPacket result = patan2(aflip, a.v); // atan2 crap atan2 crap ... + return (Packet)pand(result, peven_mask(result)); // atan2 0 atan2 0 ... +} + +/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements + * outside this range are not defined. \a *from does not need to be aligned, and can be null if \a count is zero.*/ +template +EIGEN_DEVICE_FUNC inline Packet ploaduSegment(const typename unpacket_traits::type* from, Index begin, + Index count) { + using Scalar = typename unpacket_traits::type; + constexpr Index PacketSize = unpacket_traits::size; + eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range"); + Scalar aux[PacketSize] = {}; + for (Index k = begin; k < begin + count; k++) { + aux[k] = from[k]; + } + return ploadu(aux); +} + +/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements + * outside this range are not defined. \a *from must be aligned, and cannot be null.*/ +template +EIGEN_DEVICE_FUNC inline Packet ploadSegment(const typename unpacket_traits::type* from, Index begin, + Index count) { + return ploaduSegment(from, begin, count); +} + +/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. +Elements outside of the range [begin, begin + count) are not defined. \a *to does not need to be aligned, and can be +null if \a count is zero.*/ +template +EIGEN_DEVICE_FUNC inline void pstoreuSegment(Scalar* to, const Packet& from, Index begin, Index count) { + constexpr Index PacketSize = unpacket_traits::size; + eigen_assert((begin >= 0 && count >= 0 && begin + count <= PacketSize) && "invalid range"); + Scalar aux[PacketSize]; + pstoreu(aux, from); + for (Index k = begin; k < begin + count; k++) { + to[k] = aux[k]; + } +} + +/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. +Elements outside of the range [begin, begin + count) are not defined. \a *to must be aligned, and cannot be +null.*/ +template +EIGEN_DEVICE_FUNC inline void pstoreSegment(Scalar* to, const Packet& from, Index begin, Index count) { + return pstoreuSegment(to, from, begin, count); +} + +/** \internal \returns a packet populated with values in the range [begin, begin + count). Elements + * outside this range are not defined.*/ +template +EIGEN_DEVICE_FUNC inline Packet ploadtSegment(const typename unpacket_traits::type* from, Index begin, + Index count) { + constexpr int RequiredAlignment = unpacket_traits::alignment; + if (Alignment >= RequiredAlignment) { + return ploadSegment(from, begin, count); + } else { + return ploaduSegment(from, begin, count); + } +} + +/** \internal copy the packet \a from in the range [begin, begin + count) to \a *to. +Elements outside of the range [begin, begin + count) are not defined.*/ +template +EIGEN_DEVICE_FUNC inline void pstoretSegment(Scalar* to, const Packet& from, Index begin, Index count) { + constexpr int RequiredAlignment = unpacket_traits::alignment; + if (Alignment >= RequiredAlignment) { + pstoreSegment(to, from, begin, count); + } else { + pstoreuSegment(to, from, begin, count); + } +} + +#ifndef EIGEN_NO_IO + +template +class StreamablePacket { + public: + using Scalar = typename unpacket_traits::type; + StreamablePacket(const Packet& packet) { pstoreu(v_, packet); } + + friend std::ostream& operator<<(std::ostream& os, const StreamablePacket& packet) { + os << "{" << packet.v_[0]; + for (int i = 1; i < unpacket_traits::size; ++i) { + os << "," << packet.v_[i]; + } + os << "}"; + return os; + } + + private: + Scalar v_[unpacket_traits::size]; +}; + +/** + * \internal \returns an intermediary that can be used to ostream packets, e.g. for debugging. + */ +template +StreamablePacket postream(const Packet& packet) { + return StreamablePacket(packet); +} + +#endif // EIGEN_NO_IO + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_GENERIC_PACKET_MATH_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/GlobalFunctions.h b/o-voxel/third_party/eigen/Eigen/src/Core/GlobalFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..e44b436def0a7cae7677189e62ed05794954e049 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/GlobalFunctions.h @@ -0,0 +1,230 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2016 Gael Guennebaud +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_GLOBAL_FUNCTIONS_H +#define EIGEN_GLOBAL_FUNCTIONS_H + +#ifdef EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME, FUNCTOR, DOC_OP, DOC_DETAILS) \ + /** \returns an expression of the coefficient-wise DOC_OP of \a x \ + \ \ + DOC_DETAILS \ + \ \ + \sa Math functions, class CwiseUnaryOp \ + */ \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived> NAME( \ + const Eigen::ArrayBase& x); + +#else + +#define EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(NAME, FUNCTOR, DOC_OP, DOC_DETAILS) \ + template \ + inline const Eigen::CwiseUnaryOp, const Derived>(NAME)( \ + const Eigen::ArrayBase& x) { \ + return Eigen::CwiseUnaryOp, const Derived>(x.derived()); \ + } + +#endif // EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(NAME, FUNCTOR) \ + \ + template \ + struct NAME##_retval > { \ + typedef const Eigen::CwiseUnaryOp, const Derived> type; \ + }; \ + template \ + struct NAME##_impl > { \ + static inline typename NAME##_retval >::type run(const Eigen::ArrayBase& x) { \ + return typename NAME##_retval >::type(x.derived()); \ + } \ + }; + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(real, scalar_real_op, real part,\sa ArrayBase::real) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(imag, scalar_imag_op, imaginary part,\sa ArrayBase::imag) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(conj, scalar_conjugate_op, complex conjugate,\sa ArrayBase::conjugate) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(inverse, scalar_inverse_op, inverse,\sa ArrayBase::inverse) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sin, scalar_sin_op, sine,\sa ArrayBase::sin) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cos, scalar_cos_op, cosine,\sa ArrayBase::cos) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tan, scalar_tan_op, tangent,\sa ArrayBase::tan) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atan, scalar_atan_op, arc - tangent,\sa ArrayBase::atan) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asin, scalar_asin_op, arc - sine,\sa ArrayBase::asin) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acos, scalar_acos_op, arc - consine,\sa ArrayBase::acos) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sinh, scalar_sinh_op, hyperbolic sine,\sa ArrayBase::sinh) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cosh, scalar_cosh_op, hyperbolic cosine,\sa ArrayBase::cosh) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(tanh, scalar_tanh_op, hyperbolic tangent,\sa ArrayBase::tanh) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(asinh, scalar_asinh_op, inverse hyperbolic sine,\sa ArrayBase::asinh) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(acosh, scalar_acosh_op, inverse hyperbolic cosine,\sa ArrayBase::acosh) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(atanh, scalar_atanh_op, inverse hyperbolic tangent,\sa ArrayBase::atanh) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(logistic, scalar_logistic_op, logistic function,\sa ArrayBase::logistic) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(lgamma, scalar_lgamma_op, + natural logarithm of the gamma function,\sa ArrayBase::lgamma) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(digamma, scalar_digamma_op, derivative of lgamma,\sa ArrayBase::digamma) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erf, scalar_erf_op, error function,\sa ArrayBase::erf) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(erfc, scalar_erfc_op, complement error function,\sa ArrayBase::erfc) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(ndtri, scalar_ndtri_op, inverse normal distribution function,\sa ArrayBase::ndtri) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp, scalar_exp_op, exponential,\sa ArrayBase::exp) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(exp2, scalar_exp2_op, exponential,\sa ArrayBase::exp2) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(expm1, scalar_expm1_op, exponential of a value minus 1,\sa ArrayBase::expm1) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log, scalar_log_op, natural logarithm,\sa Eigen::log10 DOXCOMMA ArrayBase::log) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log1p, scalar_log1p_op, natural logarithm of 1 plus the value,\sa ArrayBase::log1p) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log10, scalar_log10_op, base 10 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log10) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(log2, scalar_log2_op, base 2 logarithm,\sa Eigen::log DOXCOMMA ArrayBase::log2) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs, scalar_abs_op, absolute value,\sa ArrayBase::abs DOXCOMMA MatrixBase::cwiseAbs) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(abs2, scalar_abs2_op, + squared absolute value,\sa ArrayBase::abs2 DOXCOMMA MatrixBase::cwiseAbs2) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(arg, scalar_arg_op, complex argument,\sa ArrayBase::arg DOXCOMMA MatrixBase::cwiseArg) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(carg, scalar_carg_op, + complex argument, \sa ArrayBase::carg DOXCOMMA MatrixBase::cwiseCArg) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sqrt, scalar_sqrt_op, square root,\sa ArrayBase::sqrt DOXCOMMA MatrixBase::cwiseSqrt) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cbrt, scalar_cbrt_op, cube root,\sa ArrayBase::cbrt DOXCOMMA MatrixBase::cwiseCbrt) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rsqrt, scalar_rsqrt_op, reciprocal square root,\sa ArrayBase::rsqrt) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(square, scalar_square_op, + square(power 2),\sa Eigen::abs2 DOXCOMMA Eigen::pow DOXCOMMA ArrayBase::square) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(cube, scalar_cube_op, cube(power 3),\sa Eigen::pow DOXCOMMA ArrayBase::cube) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(rint, scalar_rint_op, + nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(round, scalar_round_op, + nearest integer,\sa Eigen::floor DOXCOMMA Eigen::ceil DOXCOMMA ArrayBase::round) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( + floor, scalar_floor_op, nearest integer not greater than the given value,\sa Eigen::ceil DOXCOMMA ArrayBase::floor) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( + ceil, scalar_ceil_op, nearest integer not less than the given value,\sa Eigen::floor DOXCOMMA ArrayBase::ceil) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(trunc, scalar_trunc_op, + nearest integer not greater in magnitude than the given value,\sa Eigen::trunc DOXCOMMA + ArrayBase::trunc) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( + isnan, scalar_isnan_op, not -a - number test,\sa Eigen::isinf DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isnan) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY( + isinf, scalar_isinf_op, infinite value test,\sa Eigen::isnan DOXCOMMA Eigen::isfinite DOXCOMMA ArrayBase::isinf) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(isfinite, scalar_isfinite_op, + finite value test,\sa Eigen::isinf DOXCOMMA Eigen::isnan DOXCOMMA ArrayBase::isfinite) +EIGEN_ARRAY_DECLARE_GLOBAL_UNARY(sign, scalar_sign_op, sign(or 0),\sa ArrayBase::sign) + +template +using GlobalUnaryPowReturnType = std::enable_if_t< + !internal::is_arithmetic::Real>::value && + internal::is_arithmetic::Real>::value, + CwiseUnaryOp, const Derived> >; + +/** \returns an expression of the coefficient-wise power of \a x to the given constant \a exponent. + * + * \tparam ScalarExponent is the scalar type of \a exponent. It must be compatible with the scalar type of the given + * expression (\c Derived::Scalar). + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +template +EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType pow(const Eigen::ArrayBase& x, + const ScalarExponent& exponent); +#else +template +EIGEN_DEVICE_FUNC inline const GlobalUnaryPowReturnType pow(const Eigen::ArrayBase& x, + const ScalarExponent& exponent) { + return GlobalUnaryPowReturnType( + x.derived(), internal::scalar_unary_pow_op(exponent)); +} +#endif + +/** \returns an expression of the coefficient-wise power of \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power. + * + * Example: \include Cwise_array_power_array.cpp + * Output: \verbinclude Cwise_array_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +template +inline const Eigen::CwiseBinaryOp< + Eigen::internal::scalar_pow_op, const Derived, + const ExponentDerived> +pow(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { + return Eigen::CwiseBinaryOp< + Eigen::internal::scalar_pow_op, const Derived, + const ExponentDerived>(x.derived(), exponents.derived()); +} + +/** \returns an expression of the coefficient-wise power of the scalar \a x to the given array of \a exponents. + * + * This function computes the coefficient-wise power between a scalar and an array of exponents. + * + * \tparam Scalar is the scalar type of \a x. It must be compatible with the scalar type of the given array expression + * (\c Derived::Scalar). + * + * Example: \include Cwise_scalar_power_array.cpp + * Output: \verbinclude Cwise_scalar_power_array.out + * + * \sa ArrayBase::pow() + * + * \relates ArrayBase + */ +#ifdef EIGEN_PARSED_BY_DOXYGEN +template +inline const CwiseBinaryOp, Constant, Derived> pow( + const Scalar& x, const Eigen::ArrayBase& x); +#else +template +EIGEN_DEVICE_FUNC inline const EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE( + typename internal::promote_scalar_arg::type, + Derived, pow) pow(const Scalar& x, const Eigen::ArrayBase& exponents) { + typedef + typename internal::promote_scalar_arg::type + PromotedScalar; + return EIGEN_SCALAR_BINARYOP_EXPR_RETURN_TYPE(PromotedScalar, Derived, pow)( + typename internal::plain_constant_type::type( + exponents.derived().rows(), exponents.derived().cols(), internal::scalar_constant_op(x)), + exponents.derived()); +} +#endif + +/** \returns an expression of the coefficient-wise atan2(\a x, \a y). \a x and \a y must be of the same type. + * + * This function computes the coefficient-wise atan2(). + * + * \sa ArrayBase::atan2() + * + * \relates ArrayBase + */ +template +inline const std::enable_if_t< + std::is_same::value, + Eigen::CwiseBinaryOp, + const LhsDerived, const RhsDerived> > +atan2(const Eigen::ArrayBase& x, const Eigen::ArrayBase& exponents) { + return Eigen::CwiseBinaryOp< + Eigen::internal::scalar_atan2_op, const LhsDerived, + const RhsDerived>(x.derived(), exponents.derived()); +} + +namespace internal { +EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(real, scalar_real_op) +EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(imag, scalar_imag_op) +EIGEN_ARRAY_DECLARE_GLOBAL_EIGEN_UNARY(abs2, scalar_abs2_op) +} // namespace internal +} // namespace Eigen + +// TODO: cleanly disable those functions that are not supported on Array (numext::real_ref, internal::random, +// internal::isApprox...) + +#endif // EIGEN_GLOBAL_FUNCTIONS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/IO.h b/o-voxel/third_party/eigen/Eigen/src/Core/IO.h new file mode 100644 index 0000000000000000000000000000000000000000..25a05f1a0997088022859d26e2bcffee4949c5c7 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/IO.h @@ -0,0 +1,233 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_IO_H +#define EIGEN_IO_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +enum { DontAlignCols = 1 }; +enum { StreamPrecision = -1, FullPrecision = -2 }; + +namespace internal { +template +std::ostream& print_matrix(std::ostream& s, const Derived& _m, const IOFormat& fmt); +} + +/** \class IOFormat + * \ingroup Core_Module + * + * \brief Stores a set of parameters controlling the way matrices are printed + * + * List of available parameters: + * - \b precision number of digits for floating point values, or one of the special constants \c StreamPrecision and \c + * FullPrecision. The default is the special value \c StreamPrecision which means to use the stream's own precision + * setting, as set for instance using \c cout.precision(3). The other special value \c FullPrecision means that the + * number of digits will be computed to match the full precision of each floating-point type. + * - \b flags an OR-ed combination of flags, the default value is 0, the only currently available flag is \c + * DontAlignCols which allows to disable the alignment of columns, resulting in faster code. + * - \b coeffSeparator string printed between two coefficients of the same row + * - \b rowSeparator string printed between two rows + * - \b rowPrefix string printed at the beginning of each row + * - \b rowSuffix string printed at the end of each row + * - \b matPrefix string printed at the beginning of the matrix + * - \b matSuffix string printed at the end of the matrix + * - \b fill character printed to fill the empty space in aligned columns + * + * Example: \include IOFormat.cpp + * Output: \verbinclude IOFormat.out + * + * \sa DenseBase::format(), class WithFormat + */ +struct IOFormat { + /** Default constructor, see class IOFormat for the meaning of the parameters */ + IOFormat(int _precision = StreamPrecision, int _flags = 0, const std::string& _coeffSeparator = " ", + const std::string& _rowSeparator = "\n", const std::string& _rowPrefix = "", + const std::string& _rowSuffix = "", const std::string& _matPrefix = "", const std::string& _matSuffix = "", + const char _fill = ' ') + : matPrefix(_matPrefix), + matSuffix(_matSuffix), + rowPrefix(_rowPrefix), + rowSuffix(_rowSuffix), + rowSeparator(_rowSeparator), + rowSpacer(""), + coeffSeparator(_coeffSeparator), + fill(_fill), + precision(_precision), + flags(_flags) { + // TODO check if rowPrefix, rowSuffix or rowSeparator contains a newline + // don't add rowSpacer if columns are not to be aligned + if ((flags & DontAlignCols)) return; + int i = int(matPrefix.length()) - 1; + while (i >= 0 && matPrefix[i] != '\n') { + rowSpacer += ' '; + i--; + } + } + std::string matPrefix, matSuffix; + std::string rowPrefix, rowSuffix, rowSeparator, rowSpacer; + std::string coeffSeparator; + char fill; + int precision; + int flags; +}; + +/** \class WithFormat + * \ingroup Core_Module + * + * \brief Pseudo expression providing matrix output with given format + * + * \tparam ExpressionType the type of the object on which IO stream operations are performed + * + * This class represents an expression with stream operators controlled by a given IOFormat. + * It is the return type of DenseBase::format() + * and most of the time this is the only way it is used. + * + * See class IOFormat for some examples. + * + * \sa DenseBase::format(), class IOFormat + */ +template +class WithFormat { + public: + WithFormat(const ExpressionType& matrix, const IOFormat& format) : m_matrix(matrix), m_format(format) {} + + friend std::ostream& operator<<(std::ostream& s, const WithFormat& wf) { + return internal::print_matrix(s, wf.m_matrix.eval(), wf.m_format); + } + + protected: + typename ExpressionType::Nested m_matrix; + IOFormat m_format; +}; + +namespace internal { + +// NOTE: This helper is kept for backward compatibility with previous code specializing +// this internal::significant_decimals_impl structure. In the future we should directly +// call max_digits10(). +template +struct significant_decimals_impl { + static inline int run() { return NumTraits::max_digits10(); } +}; + +/** \internal + * print the matrix \a _m to the output stream \a s using the output format \a fmt */ +template +std::ostream& print_matrix(std::ostream& s, const Derived& _m, const IOFormat& fmt) { + using internal::is_same; + + if (_m.size() == 0) { + s << fmt.matPrefix << fmt.matSuffix; + return s; + } + + typename Derived::Nested m = _m; + typedef typename Derived::Scalar Scalar; + typedef std::conditional_t::value || is_same::value || + is_same::value || is_same::value, + int, + std::conditional_t >::value || + is_same >::value || + is_same >::value || + is_same >::value, + std::complex, const Scalar&> > + PrintType; + + Index width = 0; + + std::streamsize explicit_precision; + if (fmt.precision == StreamPrecision) { + explicit_precision = 0; + } else if (fmt.precision == FullPrecision) { + if (NumTraits::IsInteger) { + explicit_precision = 0; + } else { + explicit_precision = significant_decimals_impl::run(); + } + } else { + explicit_precision = fmt.precision; + } + + std::streamsize old_precision = 0; + if (explicit_precision) old_precision = s.precision(explicit_precision); + + bool align_cols = !(fmt.flags & DontAlignCols); + if (align_cols) { + // compute the largest width + for (Index j = 0; j < m.cols(); ++j) + for (Index i = 0; i < m.rows(); ++i) { + std::stringstream sstr; + sstr.copyfmt(s); + sstr << static_cast(m.coeff(i, j)); + width = std::max(width, Index(sstr.str().length())); + } + } + std::streamsize old_width = s.width(); + char old_fill_character = s.fill(); + s << fmt.matPrefix; + for (Index i = 0; i < m.rows(); ++i) { + if (i) s << fmt.rowSpacer; + s << fmt.rowPrefix; + if (width) { + s.fill(fmt.fill); + s.width(width); + } + s << static_cast(m.coeff(i, 0)); + for (Index j = 1; j < m.cols(); ++j) { + s << fmt.coeffSeparator; + if (width) { + s.fill(fmt.fill); + s.width(width); + } + s << static_cast(m.coeff(i, j)); + } + s << fmt.rowSuffix; + if (i < m.rows() - 1) s << fmt.rowSeparator; + } + s << fmt.matSuffix; + if (explicit_precision) s.precision(old_precision); + if (width) { + s.fill(old_fill_character); + s.width(old_width); + } + return s; +} + +} // end namespace internal + +/** \relates DenseBase + * + * Outputs the matrix, to the given stream. + * + * If you wish to print the matrix with a format different than the default, use DenseBase::format(). + * + * It is also possible to change the default format by defining EIGEN_DEFAULT_IO_FORMAT before including Eigen headers. + * If not defined, this will automatically be defined to Eigen::IOFormat(), that is the Eigen::IOFormat with default + * parameters. + * + * \sa DenseBase::format() + */ +template +std::ostream& operator<<(std::ostream& s, const DenseBase& m) { + return internal::print_matrix(s, m.eval(), EIGEN_DEFAULT_IO_FORMAT); +} + +template +std::ostream& operator<<(std::ostream& s, const DiagonalBase& m) { + return internal::print_matrix(s, m.derived(), EIGEN_DEFAULT_IO_FORMAT); +} + +} // end namespace Eigen + +#endif // EIGEN_IO_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/IndexedView.h b/o-voxel/third_party/eigen/Eigen/src/Core/IndexedView.h new file mode 100644 index 0000000000000000000000000000000000000000..531ead1f3d3686d8fc145db50741b79ca79b0acf --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/IndexedView.h @@ -0,0 +1,321 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2017 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INDEXED_VIEW_H +#define EIGEN_INDEXED_VIEW_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct traits> : traits { + enum { + RowsAtCompileTime = int(IndexedViewHelper::SizeAtCompileTime), + ColsAtCompileTime = int(IndexedViewHelper::SizeAtCompileTime), + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + + XprTypeIsRowMajor = (int(traits::Flags) & RowMajorBit) != 0, + IsRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 + : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 + : XprTypeIsRowMajor, + + RowIncr = int(IndexedViewHelper::IncrAtCompileTime), + ColIncr = int(IndexedViewHelper::IncrAtCompileTime), + InnerIncr = IsRowMajor ? ColIncr : RowIncr, + OuterIncr = IsRowMajor ? RowIncr : ColIncr, + + HasSameStorageOrderAsXprType = (IsRowMajor == XprTypeIsRowMajor), + XprInnerStride = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time::ret) + : int(outer_stride_at_compile_time::ret), + XprOuterstride = HasSameStorageOrderAsXprType ? int(outer_stride_at_compile_time::ret) + : int(inner_stride_at_compile_time::ret), + + InnerSize = XprTypeIsRowMajor ? ColsAtCompileTime : RowsAtCompileTime, + IsBlockAlike = InnerIncr == 1 && OuterIncr == 1, + IsInnerPannel = HasSameStorageOrderAsXprType && + is_same, std::conditional_t>::value, + + InnerStrideAtCompileTime = + InnerIncr < 0 || InnerIncr == DynamicIndex || XprInnerStride == Dynamic || InnerIncr == Undefined + ? Dynamic + : XprInnerStride * InnerIncr, + OuterStrideAtCompileTime = + OuterIncr < 0 || OuterIncr == DynamicIndex || XprOuterstride == Dynamic || OuterIncr == Undefined + ? Dynamic + : XprOuterstride * OuterIncr, + + ReturnAsScalar = is_single_range::value && is_single_range::value, + ReturnAsBlock = (!ReturnAsScalar) && IsBlockAlike, + ReturnAsIndexedView = (!ReturnAsScalar) && (!ReturnAsBlock), + + // FIXME we deal with compile-time strides if and only if we have DirectAccessBit flag, + // but this is too strict regarding negative strides... + DirectAccessMask = (int(InnerIncr) != Undefined && int(OuterIncr) != Undefined && InnerIncr >= 0 && OuterIncr >= 0) + ? DirectAccessBit + : 0, + FlagsRowMajorBit = IsRowMajor ? RowMajorBit : 0, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, + Flags = (traits::Flags & (HereditaryBits | DirectAccessMask)) | FlagsLvalueBit | FlagsRowMajorBit | + FlagsLinearAccessBit + }; + + typedef Block BlockType; +}; + +template +class IndexedViewImpl; + +} // namespace internal + +/** \class IndexedView + * \ingroup Core_Module + * + * \brief Expression of a non-sequential sub-matrix defined by arbitrary sequences of row and column indices + * + * \tparam XprType the type of the expression in which we are taking the intersections of sub-rows and sub-columns + * \tparam RowIndices the type of the object defining the sequence of row indices + * \tparam ColIndices the type of the object defining the sequence of column indices + * + * This class represents an expression of a sub-matrix (or sub-vector) defined as the intersection + * of sub-sets of rows and columns, that are themself defined by generic sequences of row indices \f$ + * \{r_0,r_1,..r_{m-1}\} \f$ and column indices \f$ \{c_0,c_1,..c_{n-1} \}\f$. Let \f$ A \f$ be the nested matrix, then + * the resulting matrix \f$ B \f$ has \c m rows and \c n columns, and its entries are given by: \f$ B(i,j) = A(r_i,c_j) + * \f$. + * + * The \c RowIndices and \c ColIndices types must be compatible with the following API: + * \code + * operator[](Index) const; + * Index size() const; + * \endcode + * + * Typical supported types thus include: + * - std::vector + * - std::valarray + * - std::array + * - Eigen::ArrayXi + * - decltype(ArrayXi::LinSpaced(...)) + * - Any view/expressions of the previous types + * - Eigen::ArithmeticSequence + * - Eigen::internal::AllRange (helper for Eigen::placeholders::all) + * - Eigen::internal::SingleRange (helper for single index) + * - etc. + * + * In typical usages of %Eigen, this class should never be used directly. It is the return type of + * DenseBase::operator()(const RowIndices&, const ColIndices&). + * + * \sa class Block + */ +template +class IndexedView + : public internal::IndexedViewImpl::StorageKind, + (internal::traits>::Flags & + DirectAccessBit) != 0> { + public: + typedef typename internal::IndexedViewImpl< + XprType, RowIndices, ColIndices, typename internal::traits::StorageKind, + (internal::traits>::Flags & DirectAccessBit) != 0> + Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(IndexedView) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedView) + + template + IndexedView(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {} +}; + +namespace internal { + +// Generic API dispatcher +template +class IndexedViewImpl : public internal::generic_xpr_base>::type { + public: + typedef typename internal::generic_xpr_base>::type Base; + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + typedef internal::remove_all_t NestedExpression; + typedef typename XprType::Scalar Scalar; + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl) + + template + IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) + : m_xpr(xpr), m_rowIndices(rowIndices), m_colIndices(colIndices) {} + + /** \returns number of rows */ + Index rows() const { return IndexedViewHelper::size(m_rowIndices); } + + /** \returns number of columns */ + Index cols() const { return IndexedViewHelper::size(m_colIndices); } + + /** \returns the nested expression */ + const internal::remove_all_t& nestedExpression() const { return m_xpr; } + + /** \returns the nested expression */ + std::remove_reference_t& nestedExpression() { return m_xpr; } + + /** \returns a const reference to the object storing/generating the row indices */ + const RowIndices& rowIndices() const { return m_rowIndices; } + + /** \returns a const reference to the object storing/generating the column indices */ + const ColIndices& colIndices() const { return m_colIndices; } + + constexpr Scalar& coeffRef(Index rowId, Index colId) { + return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]); + } + + constexpr const Scalar& coeffRef(Index rowId, Index colId) const { + return nestedExpression().coeffRef(m_rowIndices[rowId], m_colIndices[colId]); + } + + protected: + MatrixTypeNested m_xpr; + RowIndices m_rowIndices; + ColIndices m_colIndices; +}; + +template +class IndexedViewImpl + : public IndexedViewImpl { + public: + using Base = internal::IndexedViewImpl::StorageKind, false>; + using Derived = IndexedView; + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(IndexedViewImpl) + + template + IndexedViewImpl(XprType& xpr, const T0& rowIndices, const T1& colIndices) : Base(xpr, rowIndices, colIndices) {} + + Index rowIncrement() const { + if (traits::RowIncr != DynamicIndex && traits::RowIncr != Undefined) { + return traits::RowIncr; + } + return IndexedViewHelper::incr(this->rowIndices()); + } + Index colIncrement() const { + if (traits::ColIncr != DynamicIndex && traits::ColIncr != Undefined) { + return traits::ColIncr; + } + return IndexedViewHelper::incr(this->colIndices()); + } + + Index innerIncrement() const { return traits::IsRowMajor ? colIncrement() : rowIncrement(); } + + Index outerIncrement() const { return traits::IsRowMajor ? rowIncrement() : colIncrement(); } + + std::decay_t* data() { + Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride(); + Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride(); + return this->nestedExpression().data() + row_offset + col_offset; + } + + const std::decay_t* data() const { + Index row_offset = this->rowIndices()[0] * this->nestedExpression().rowStride(); + Index col_offset = this->colIndices()[0] * this->nestedExpression().colStride(); + return this->nestedExpression().data() + row_offset + col_offset; + } + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { + if (traits::InnerStrideAtCompileTime != Dynamic) { + return traits::InnerStrideAtCompileTime; + } + return innerIncrement() * this->nestedExpression().innerStride(); + } + + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { + if (traits::OuterStrideAtCompileTime != Dynamic) { + return traits::OuterStrideAtCompileTime; + } + return outerIncrement() * this->nestedExpression().outerStride(); + } +}; + +template +struct unary_evaluator, IndexBased> + : evaluator_base> { + typedef IndexedView XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost /* TODO + cost of row/col index */, + + FlagsLinearAccessBit = + (traits::RowsAtCompileTime == 1 || traits::ColsAtCompileTime == 1) ? LinearAccessBit : 0, + + FlagsRowMajorBit = traits::FlagsRowMajorBit, + + Flags = (evaluator::Flags & (HereditaryBits & ~RowMajorBit /*| LinearAccessBit | DirectAccessBit*/)) | + FlagsLinearAccessBit | FlagsRowMajorBit, + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit unary_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && + m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); + return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && + m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); + return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + Index row = XprType::RowsAtCompileTime == 1 ? 0 : index; + Index col = XprType::RowsAtCompileTime == 1 ? index : 0; + eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && + m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); + return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { + Index row = XprType::RowsAtCompileTime == 1 ? 0 : index; + Index col = XprType::RowsAtCompileTime == 1 ? index : 0; + eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && + m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); + return m_argImpl.coeffRef(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const { + Index row = XprType::RowsAtCompileTime == 1 ? 0 : index; + Index col = XprType::RowsAtCompileTime == 1 ? index : 0; + eigen_assert(m_xpr.rowIndices()[row] >= 0 && m_xpr.rowIndices()[row] < m_xpr.nestedExpression().rows() && + m_xpr.colIndices()[col] >= 0 && m_xpr.colIndices()[col] < m_xpr.nestedExpression().cols()); + return m_argImpl.coeff(m_xpr.rowIndices()[row], m_xpr.colIndices()[col]); + } + + protected: + evaluator m_argImpl; + const XprType& m_xpr; +}; + +// Catch assignments to an IndexedView. +template +struct evaluator_assume_aliasing> { + static const bool value = true; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INDEXED_VIEW_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/InnerProduct.h b/o-voxel/third_party/eigen/Eigen/src/Core/InnerProduct.h new file mode 100644 index 0000000000000000000000000000000000000000..85609fd24f7f59fc3a160b30aa6361d75f867b54 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/InnerProduct.h @@ -0,0 +1,260 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INNER_PRODUCT_EVAL_H +#define EIGEN_INNER_PRODUCT_EVAL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// recursively searches for the largest simd type that does not exceed Size, or the smallest if no such type exists +template ::type, + bool Stop = + (unpacket_traits::size <= Size) || is_same::half>::value> +struct find_inner_product_packet_helper; + +template +struct find_inner_product_packet_helper { + using type = typename find_inner_product_packet_helper::half>::type; +}; + +template +struct find_inner_product_packet_helper { + using type = Packet; +}; + +template +struct find_inner_product_packet : find_inner_product_packet_helper {}; + +template +struct find_inner_product_packet { + using type = typename packet_traits::type; +}; + +template +struct inner_product_assert { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Lhs) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Rhs) + EIGEN_STATIC_ASSERT_SAME_VECTOR_SIZE(Lhs, Rhs) +#ifndef EIGEN_NO_DEBUG + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, const Rhs& rhs) { + eigen_assert((lhs.size() == rhs.size()) && "Inner product: lhs and rhs vectors must have same size"); + } +#else + static EIGEN_DEVICE_FUNC void run(const Lhs&, const Rhs&) {} +#endif +}; + +template +struct inner_product_evaluator { + static constexpr int LhsFlags = evaluator::Flags; + static constexpr int RhsFlags = evaluator::Flags; + static constexpr int SizeAtCompileTime = size_prefer_fixed(Lhs::SizeAtCompileTime, Rhs::SizeAtCompileTime); + static constexpr int MaxSizeAtCompileTime = + min_size_prefer_fixed(Lhs::MaxSizeAtCompileTime, Rhs::MaxSizeAtCompileTime); + static constexpr int LhsAlignment = evaluator::Alignment; + static constexpr int RhsAlignment = evaluator::Alignment; + + using Scalar = typename Func::result_type; + using Packet = typename find_inner_product_packet::type; + + static constexpr bool Vectorize = + bool(LhsFlags & RhsFlags & PacketAccessBit) && Func::PacketAccess && + ((MaxSizeAtCompileTime == Dynamic) || (unpacket_traits::size <= MaxSizeAtCompileTime)); + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit inner_product_evaluator(const Lhs& lhs, const Rhs& rhs, + Func func = Func()) + : m_func(func), m_lhs(lhs), m_rhs(rhs), m_size(lhs.size()) { + inner_product_assert::run(lhs, rhs); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index size() const { return m_size.value(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const { + return m_func.coeff(m_lhs.coeff(index), m_rhs.coeff(index)); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& value, Index index) const { + return m_func.coeff(value, m_lhs.coeff(index), m_rhs.coeff(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + return m_func.packet(m_lhs.template packet(index), + m_rhs.template packet(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(const PacketType& value, Index index) const { + return m_func.packet(value, m_lhs.template packet(index), + m_rhs.template packet(index)); + } + + const Func m_func; + const evaluator m_lhs; + const evaluator m_rhs; + const variable_if_dynamic m_size; +}; + +template +struct inner_product_impl; + +// scalar loop +template +struct inner_product_impl { + using Scalar = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) { + const Index size = eval.size(); + if (size == 0) return Scalar(0); + + Scalar result = eval.coeff(0); + for (Index k = 1; k < size; k++) { + result = eval.coeff(result, k); + } + + return result; + } +}; + +// vector loop +template +struct inner_product_impl { + using UnsignedIndex = std::make_unsigned_t; + using Scalar = typename Evaluator::Scalar; + using Packet = typename Evaluator::Packet; + static constexpr int PacketSize = unpacket_traits::size; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval) { + const UnsignedIndex size = static_cast(eval.size()); + if (size < PacketSize) return inner_product_impl::run(eval); + + const UnsignedIndex packetEnd = numext::round_down(size, PacketSize); + const UnsignedIndex quadEnd = numext::round_down(size, 4 * PacketSize); + const UnsignedIndex numPackets = size / PacketSize; + const UnsignedIndex numRemPackets = (packetEnd - quadEnd) / PacketSize; + + Packet presult0, presult1, presult2, presult3; + + presult0 = eval.template packet(0 * PacketSize); + if (numPackets >= 2) presult1 = eval.template packet(1 * PacketSize); + if (numPackets >= 3) presult2 = eval.template packet(2 * PacketSize); + if (numPackets >= 4) { + presult3 = eval.template packet(3 * PacketSize); + + for (UnsignedIndex k = 4 * PacketSize; k < quadEnd; k += 4 * PacketSize) { + presult0 = eval.packet(presult0, k + 0 * PacketSize); + presult1 = eval.packet(presult1, k + 1 * PacketSize); + presult2 = eval.packet(presult2, k + 2 * PacketSize); + presult3 = eval.packet(presult3, k + 3 * PacketSize); + } + + if (numRemPackets >= 1) presult0 = eval.packet(presult0, quadEnd + 0 * PacketSize); + if (numRemPackets >= 2) presult1 = eval.packet(presult1, quadEnd + 1 * PacketSize); + if (numRemPackets == 3) presult2 = eval.packet(presult2, quadEnd + 2 * PacketSize); + + presult2 = padd(presult2, presult3); + } + + if (numPackets >= 3) presult1 = padd(presult1, presult2); + if (numPackets >= 2) presult0 = padd(presult0, presult1); + + Scalar result = predux(presult0); + for (UnsignedIndex k = packetEnd; k < size; k++) { + result = eval.coeff(result, k); + } + + return result; + } +}; + +template +struct conditional_conj; + +template +struct conditional_conj { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return numext::conj(a); } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) { + return pconj(a); + } +}; + +template +struct conditional_conj { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a) { return a; } + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a) { + return a; + } +}; + +template +struct scalar_inner_product_op { + using result_type = typename ScalarBinaryOpTraits::ReturnType; + using conj_helper = conditional_conj; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const LhsScalar& a, const RhsScalar& b) const { + return (conj_helper::coeff(a) * b); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type coeff(const result_type& accum, const LhsScalar& a, + const RhsScalar& b) const { + return (conj_helper::coeff(a) * b) + accum; + } + static constexpr bool PacketAccess = false; +}; + +// Partial specialization for packet access if and only if +// LhsScalar == RhsScalar == ScalarBinaryOpTraits::ReturnType. +template +struct scalar_inner_product_op< + Scalar, + typename std::enable_if::ReturnType, Scalar>::value, + Scalar>::type, + Conj> { + using result_type = Scalar; + using conj_helper = conditional_conj; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& a, const Scalar& b) const { + return pmul(conj_helper::coeff(a), b); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(const Scalar& accum, const Scalar& a, const Scalar& b) const { + return pmadd(conj_helper::coeff(a), b, accum); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& a, const Packet& b) const { + return pmul(conj_helper::packet(a), b); + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packet(const Packet& accum, const Packet& a, const Packet& b) const { + return pmadd(conj_helper::packet(a), b, accum); + } + static constexpr bool PacketAccess = packet_traits::HasMul && packet_traits::HasAdd; +}; + +template +struct default_inner_product_impl { + using LhsScalar = typename traits::Scalar; + using RhsScalar = typename traits::Scalar; + using Op = scalar_inner_product_op; + using Evaluator = inner_product_evaluator; + using result_type = typename Evaluator::Scalar; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE result_type run(const MatrixBase& a, const MatrixBase& b) { + Evaluator eval(a.derived(), b.derived(), Op()); + return inner_product_impl::run(eval); + } +}; + +template +struct dot_impl : default_inner_product_impl {}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_INNER_PRODUCT_EVAL_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/InternalHeaderCheck.h b/o-voxel/third_party/eigen/Eigen/src/Core/InternalHeaderCheck.h new file mode 100644 index 0000000000000000000000000000000000000000..97bf4567601b0b0934606c9ef26842293a43d3ad --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/InternalHeaderCheck.h @@ -0,0 +1,3 @@ +#ifndef EIGEN_CORE_MODULE_H +#error "Please include Eigen/Core instead of including headers inside the src directory directly." +#endif diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Inverse.h b/o-voxel/third_party/eigen/Eigen/src/Core/Inverse.h new file mode 100644 index 0000000000000000000000000000000000000000..d757fce65b6c0f91c42982a9142b9810db05e2a8 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Inverse.h @@ -0,0 +1,108 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014-2019 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_INVERSE_H +#define EIGEN_INVERSE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +class InverseImpl; + +namespace internal { + +template +struct traits > : traits { + typedef typename XprType::PlainObject PlainObject; + typedef traits BaseTraits; + enum { Flags = BaseTraits::Flags & RowMajorBit }; +}; + +} // end namespace internal + +/** \class Inverse + * + * \brief Expression of the inverse of another expression + * + * \tparam XprType the type of the expression we are taking the inverse + * + * This class represents an abstract expression of A.inverse() + * and most of the time this is the only way it is used. + * + */ +template +class Inverse : public InverseImpl::StorageKind> { + public: + typedef typename XprType::StorageIndex StorageIndex; + typedef typename XprType::Scalar Scalar; + typedef typename internal::ref_selector::type XprTypeNested; + typedef internal::remove_all_t XprTypeNestedCleaned; + typedef typename internal::ref_selector::type Nested; + typedef internal::remove_all_t NestedExpression; + + explicit EIGEN_DEVICE_FUNC Inverse(const XprType& xpr) : m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_xpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_xpr.rows(); } + + EIGEN_DEVICE_FUNC const XprTypeNestedCleaned& nestedExpression() const { return m_xpr; } + + protected: + XprTypeNested m_xpr; +}; + +// Generic API dispatcher +template +class InverseImpl : public internal::generic_xpr_base >::type { + public: + typedef typename internal::generic_xpr_base >::type Base; + typedef typename XprType::Scalar Scalar; + + private: + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +namespace internal { + +/** \internal + * \brief Default evaluator for Inverse expression. + * + * This default evaluator for Inverse expression simply evaluate the inverse into a temporary + * by a call to internal::call_assignment_no_alias. + * Therefore, inverse implementers only have to specialize Assignment, ...> for + * there own nested expression. + * + * \sa class Inverse + */ +template +struct unary_evaluator > : public evaluator::PlainObject> { + typedef Inverse InverseType; + typedef typename InverseType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + EIGEN_DEVICE_FUNC unary_evaluator(const InverseType& inv_xpr) : m_result(inv_xpr.rows(), inv_xpr.cols()) { + internal::construct_at(this, m_result); + internal::call_assignment_no_alias(m_result, inv_xpr); + } + + protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_INVERSE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Map.h b/o-voxel/third_party/eigen/Eigen/src/Core/Map.h new file mode 100644 index 0000000000000000000000000000000000000000..555ce0be45be6d1d3e5be542d0e1b34b6faeaf3b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Map.h @@ -0,0 +1,153 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAP_H +#define EIGEN_MAP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : public traits { + typedef traits TraitsBase; + enum { + PlainObjectTypeInnerSize = ((traits::Flags & RowMajorBit) == RowMajorBit) + ? PlainObjectType::ColsAtCompileTime + : PlainObjectType::RowsAtCompileTime, + + InnerStrideAtCompileTime = StrideType::InnerStrideAtCompileTime == 0 + ? int(PlainObjectType::InnerStrideAtCompileTime) + : int(StrideType::InnerStrideAtCompileTime), + OuterStrideAtCompileTime = StrideType::OuterStrideAtCompileTime == 0 + ? (InnerStrideAtCompileTime == Dynamic || PlainObjectTypeInnerSize == Dynamic + ? Dynamic + : int(InnerStrideAtCompileTime) * int(PlainObjectTypeInnerSize)) + : int(StrideType::OuterStrideAtCompileTime), + Alignment = int(MapOptions) & int(AlignedMask), + Flags0 = TraitsBase::Flags & (~NestByRefBit), + Flags = is_lvalue::value ? int(Flags0) : (int(Flags0) & ~LvalueBit) + }; + + private: + enum { Options }; // Expressions don't have Options +}; +} // namespace internal + +/** \class Map + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing array of data. + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam MapOptions specifies the pointer alignment in bytes. It can be: \c #Aligned128, \c #Aligned64, \c #Aligned32, + * \c #Aligned16, \c #Aligned8 or \c #Unaligned. The default is \c #Unaligned. \tparam StrideType optionally specifies + * strides. By default, Map assumes the memory layout of an ordinary, contiguous array. This can be overridden by + * specifying strides. The type passed here must be a specialization of the Stride template, see examples below. + * + * This class represents a matrix or vector expression mapping an existing array of data. + * It can be used to let Eigen interface without any overhead with non-Eigen data structures, + * such as plain C arrays or structures from other libraries. By default, it assumes that the + * data is laid out contiguously in memory. You can however override this by explicitly specifying + * inner and outer strides. + * + * Here's an example of simply mapping a contiguous array as a \ref TopicStorageOrders "column-major" matrix: + * \include Map_simple.cpp + * Output: \verbinclude Map_simple.out + * + * If you need to map non-contiguous arrays, you can do so by specifying strides: + * + * Here's an example of mapping an array as a vector, specifying an inner stride, that is, the pointer + * increment between two consecutive coefficients. Here, we're specifying the inner stride as a compile-time + * fixed value. + * \include Map_inner_stride.cpp + * Output: \verbinclude Map_inner_stride.out + * + * Here's an example of mapping an array while specifying an outer stride. Here, since we're mapping + * as a column-major matrix, 'outer stride' means the pointer increment between two consecutive columns. + * Here, we're specifying the outer stride as a runtime parameter. Note that here \c OuterStride<> is + * a short version of \c OuterStride because the default template parameter of OuterStride + * is \c Dynamic + * \include Map_outer_stride.cpp + * Output: \verbinclude Map_outer_stride.out + * + * For more details and for an example of specifying both an inner and an outer stride, see class Stride. + * + * \b Tip: to change the array of data mapped by a Map object, you can use the C++ + * placement new syntax: + * + * Example: \include Map_placement_new.cpp + * Output: \verbinclude Map_placement_new.out + * + * This class is the return type of PlainObjectBase::Map() but can also be used directly. + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ +template +class Map : public MapBase > { + public: + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Map) + + typedef typename Base::PointerType PointerType; + typedef PointerType PointerArgType; + EIGEN_DEVICE_FUNC inline PointerType cast_to_pointer_type(PointerArgType ptr) { return ptr; } + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : internal::traits::OuterStrideAtCompileTime != Dynamic + ? Index(internal::traits::OuterStrideAtCompileTime) + : IsVectorAtCompileTime ? (this->size() * innerStride()) + : int(Flags) & RowMajorBit ? (this->cols() * innerStride()) + : (this->rows() * innerStride()); + } + + /** Constructor in the fixed-size case. + * + * \param dataPtr pointer to the array to map + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC explicit inline Map(PointerArgType dataPtr, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr)), m_stride(stride) {} + + /** Constructor in the dynamic-size vector case. + * + * \param dataPtr pointer to the array to map + * \param size the size of the vector expression + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index size, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), size), m_stride(stride) {} + + /** Constructor in the dynamic-size matrix case. + * + * \param dataPtr pointer to the array to map + * \param rows the number of rows of the matrix expression + * \param cols the number of columns of the matrix expression + * \param stride optional Stride object, passing the strides. + */ + EIGEN_DEVICE_FUNC inline Map(PointerArgType dataPtr, Index rows, Index cols, const StrideType& stride = StrideType()) + : Base(cast_to_pointer_type(dataPtr), rows, cols), m_stride(stride) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Map) + + protected: + StrideType m_stride; +}; + +} // end namespace Eigen + +#endif // EIGEN_MAP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/MapBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/MapBase.h new file mode 100644 index 0000000000000000000000000000000000000000..31989fbdf2f32743048c2421355b9f6350498ea2 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/MapBase.h @@ -0,0 +1,283 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2007-2010 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MAPBASE_H +#define EIGEN_MAPBASE_H + +#define EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) \ + EIGEN_STATIC_ASSERT((int(internal::evaluator::Flags) & LinearAccessBit) || Derived::IsVectorAtCompileTime, \ + YOU_ARE_TRYING_TO_USE_AN_INDEX_BASED_ACCESSOR_ON_AN_EXPRESSION_THAT_DOES_NOT_SUPPORT_THAT) + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \ingroup Core_Module + * + * \brief Base class for dense Map and Block expression with direct access + * + * This base class provides the const low-level accessors (e.g. coeff, coeffRef) of dense + * Map and Block objects with direct access. + * Typical users do not have to directly deal with this class. + * + * This class can be extended by through the macro plugin \c EIGEN_MAPBASE_PLUGIN. + * See \link TopicCustomizing_Plugins customizing Eigen \endlink for details. + * + * The \c Derived class has to provide the following two methods describing the memory layout: + * \code Index innerStride() const; \endcode + * \code Index outerStride() const; \endcode + * + * \sa class Map, class Block + */ +template +class MapBase : public internal::dense_xpr_base::type { + public: + typedef typename internal::dense_xpr_base::type Base; + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + InnerStrideAtCompileTime = internal::traits::InnerStrideAtCompileTime, + SizeAtCompileTime = Base::SizeAtCompileTime + }; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef std::conditional_t::value), Scalar*, const Scalar*> PointerType; + + using Base::derived; + // using Base::RowsAtCompileTime; + // using Base::ColsAtCompileTime; + // using Base::SizeAtCompileTime; + using Base::Flags; + using Base::IsRowMajor; + using Base::IsVectorAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxSizeAtCompileTime; + + using Base::coeff; + using Base::coeffRef; + using Base::cols; + using Base::eval; + using Base::lazyAssign; + using Base::rows; + using Base::size; + + using Base::colStride; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + + // bug 217 - compile error on ICC 11.1 + using Base::operator=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + + /** \copydoc DenseBase::rows() */ + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_rows.value(); } + /** \copydoc DenseBase::cols() */ + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_cols.value(); } + + /** Returns a pointer to the first coefficient of the matrix or vector. + * + * \note When addressing this data, make sure to honor the strides returned by innerStride() and outerStride(). + * + * \sa innerStride(), outerStride() + */ + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_data; } + + /** \copydoc PlainObjectBase::coeff(Index,Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index rowId, Index colId) const { + return m_data[colId * colStride() + rowId * rowStride()]; + } + + /** \copydoc PlainObjectBase::coeff(Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeff(Index index) const { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return m_data[index * innerStride()]; + } + + /** \copydoc PlainObjectBase::coeffRef(Index,Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { + return this->m_data[colId * colStride() + rowId * rowStride()]; + } + + /** \copydoc PlainObjectBase::coeffRef(Index) const */ + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + /** \internal */ + template + inline PacketScalar packet(Index rowId, Index colId) const { + return internal::ploadt(m_data + (colId * colStride() + rowId * rowStride())); + } + + /** \internal */ + template + inline PacketScalar packet(Index index) const { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return internal::ploadt(m_data + index * innerStride()); + } + + /** \internal Constructor for fixed size matrices or vectors */ + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) + : m_data(dataPtr), m_rows(RowsAtCompileTime), m_cols(ColsAtCompileTime) { + EIGEN_STATIC_ASSERT_FIXED_SIZE(Derived) + checkSanity(); + } + + /** \internal Constructor for dynamically sized vectors */ + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) + : m_data(dataPtr), + m_rows(RowsAtCompileTime == Dynamic ? vecSize : Index(RowsAtCompileTime)), + m_cols(ColsAtCompileTime == Dynamic ? vecSize : Index(ColsAtCompileTime)) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived) + eigen_assert(vecSize >= 0); + eigen_assert(dataPtr == 0 || SizeAtCompileTime == Dynamic || SizeAtCompileTime == vecSize); + checkSanity(); + } + + /** \internal Constructor for dynamically sized matrices */ + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) + : m_data(dataPtr), m_rows(rows), m_cols(cols) { + eigen_assert((dataPtr == 0) || (rows >= 0 && (RowsAtCompileTime == Dynamic || RowsAtCompileTime == rows) && + cols >= 0 && (ColsAtCompileTime == Dynamic || ColsAtCompileTime == cols))); + checkSanity(); + } + +#ifdef EIGEN_MAPBASE_PLUGIN +#include EIGEN_MAPBASE_PLUGIN +#endif + + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase) + + template + EIGEN_DEVICE_FUNC void checkSanity(std::enable_if_t<(internal::traits::Alignment > 0), void*> = 0) const { +// Temporary macro to allow scalars to not be properly aligned. This is while we sort out failures +// in TensorFlow Lite that are currently relying on this UB. +#ifndef EIGEN_ALLOW_UNALIGNED_SCALARS + // Pointer must be aligned to the Scalar type, otherwise we get UB. + eigen_assert((std::uintptr_t(m_data) % alignof(Scalar) == 0) && "data is not scalar-aligned"); +#endif +#if EIGEN_MAX_ALIGN_BYTES > 0 + // innerStride() is not set yet when this function is called, so we optimistically assume the lowest plausible + // value: + const Index minInnerStride = InnerStrideAtCompileTime == Dynamic ? 1 : Index(InnerStrideAtCompileTime); + EIGEN_ONLY_USED_FOR_DEBUG(minInnerStride); + eigen_assert((((std::uintptr_t(m_data) % internal::traits::Alignment) == 0) || + (cols() * rows() * minInnerStride * sizeof(Scalar)) < internal::traits::Alignment) && + "data is not aligned"); +#endif + } + + template + EIGEN_DEVICE_FUNC void checkSanity(std::enable_if_t::Alignment == 0, void*> = 0) const { +#ifndef EIGEN_ALLOW_UNALIGNED_SCALARS + // Pointer must be aligned to the Scalar type, otherwise we get UB. + eigen_assert((std::uintptr_t(m_data) % alignof(Scalar) == 0) && "data is not scalar-aligned"); +#endif + } + + PointerType m_data; + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; +}; + +/** \ingroup Core_Module + * + * \brief Base class for non-const dense Map and Block expression with direct access + * + * This base class provides the non-const low-level accessors (e.g. coeff and coeffRef) of + * dense Map and Block objects with direct access. + * It inherits MapBase which defines the const variant for reading specific entries. + * + * \sa class Map, class Block + */ +template +class MapBase : public MapBase { + typedef MapBase ReadOnlyMapBase; + + public: + typedef MapBase Base; + + typedef typename Base::Scalar Scalar; + typedef typename Base::PacketScalar PacketScalar; + typedef typename Base::StorageIndex StorageIndex; + typedef typename Base::PointerType PointerType; + + using Base::coeff; + using Base::coeffRef; + using Base::cols; + using Base::derived; + using Base::rows; + using Base::size; + + using Base::colStride; + using Base::innerStride; + using Base::outerStride; + using Base::rowStride; + + typedef std::conditional_t::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return this->m_data; } + EIGEN_DEVICE_FUNC constexpr ScalarWithConstIfNotLvalue* data() { + return this->m_data; + } // no const-cast here so non-const-correct code will give a compile error + + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index row, Index col) { + return this->m_data[col * colStride() + row * rowStride()]; + } + + EIGEN_DEVICE_FUNC inline ScalarWithConstIfNotLvalue& coeffRef(Index index) { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + return this->m_data[index * innerStride()]; + } + + template + inline void writePacket(Index row, Index col, const PacketScalar& val) { + internal::pstoret(this->m_data + (col * colStride() + row * rowStride()), val); + } + + template + inline void writePacket(Index index, const PacketScalar& val) { + EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS(Derived) + internal::pstoret(this->m_data + index * innerStride(), val); + } + + EIGEN_DEVICE_FUNC explicit inline MapBase(PointerType dataPtr) : Base(dataPtr) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index vecSize) : Base(dataPtr, vecSize) {} + EIGEN_DEVICE_FUNC inline MapBase(PointerType dataPtr, Index rows, Index cols) : Base(dataPtr, rows, cols) {} + + EIGEN_DEVICE_FUNC Derived& operator=(const MapBase& other) { + ReadOnlyMapBase::Base::operator=(other); + return derived(); + } + + // In theory we could simply refer to Base:Base::operator=, but MSVC does not like Base::Base, + // see bugs 821 and 920. + using ReadOnlyMapBase::Base::operator=; + + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(MapBase) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MapBase) +}; + +#undef EIGEN_STATIC_ASSERT_INDEX_BASED_ACCESS + +} // end namespace Eigen + +#endif // EIGEN_MAPBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/MathFunctions.h b/o-voxel/third_party/eigen/Eigen/src/Core/MathFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..5365c0a5fccb21aaf94a24a71a1f8937a1106df2 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/MathFunctions.h @@ -0,0 +1,2109 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONS_H +#define EIGEN_MATHFUNCTIONS_H + +// TODO this should better be moved to NumTraits +// Source: WolframAlpha +#define EIGEN_PI 3.141592653589793238462643383279502884197169399375105820974944592307816406L +#define EIGEN_LOG2E 1.442695040888963407359924681001892137426645954152985934135449406931109219L +#define EIGEN_LN2 0.693147180559945309417232121458176568075500134360255254120680009493393621L + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/** \internal \class global_math_functions_filtering_base + * + * What it does: + * Defines a typedef 'type' as follows: + * - if type T has a member typedef Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl, then + * global_math_functions_filtering_base::type is a typedef for it. + * - otherwise, global_math_functions_filtering_base::type is a typedef for T. + * + * How it's used: + * To allow to defined the global math functions (like sin...) in certain cases, like the Array expressions. + * When you do sin(array1+array2), the object array1+array2 has a complicated expression type, all what you want to know + * is that it inherits ArrayBase. So we implement a partial specialization of sin_impl for ArrayBase. + * So we must make sure to use sin_impl > and not sin_impl, otherwise our partial + * specialization won't be used. How does sin know that? That's exactly what global_math_functions_filtering_base tells + * it. + * + * How it's implemented: + * SFINAE in the style of enable_if. Highly susceptible of breaking compilers. With GCC, it sure does work, but if you + * replace the typename dummy by an integer template parameter, it doesn't work anymore! + */ + +template +struct global_math_functions_filtering_base { + typedef T type; +}; + +template +struct always_void { + typedef void type; +}; + +template +struct global_math_functions_filtering_base< + T, typename always_void::type> { + typedef typename T::Eigen_BaseClassForSpecializationOfGlobalMathFuncImpl type; +}; + +#define EIGEN_MATHFUNC_IMPL(func, scalar) \ + Eigen::internal::func##_impl::type> +#define EIGEN_MATHFUNC_RETVAL(func, scalar) \ + typename Eigen::internal::func##_retval< \ + typename Eigen::internal::global_math_functions_filtering_base::type>::type + +/**************************************************************************** + * Implementation of real * + ****************************************************************************/ + +template ::IsComplex> +struct real_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x; } +}; + +template +struct real_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + using std::real; + return real(x); + } +}; + +template +struct real_impl : real_default_impl {}; + +#if defined(EIGEN_GPU_COMPILE_PHASE) +template +struct real_impl> { + typedef T RealScalar; + EIGEN_DEVICE_FUNC static inline T run(const std::complex& x) { return x.real(); } +}; +#endif + +template +struct real_retval { + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** + * Implementation of imag * + ****************************************************************************/ + +template ::IsComplex> +struct imag_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar&) { return RealScalar(0); } +}; + +template +struct imag_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + using std::imag; + return imag(x); + } +}; + +template +struct imag_impl : imag_default_impl {}; + +#if defined(EIGEN_GPU_COMPILE_PHASE) +template +struct imag_impl> { + typedef T RealScalar; + EIGEN_DEVICE_FUNC static inline T run(const std::complex& x) { return x.imag(); } +}; +#endif + +template +struct imag_retval { + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** + * Implementation of real_ref * + ****************************************************************************/ + +template +struct real_ref_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[0]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { + return reinterpret_cast(&x)[0]; + } +}; + +template +struct real_ref_retval { + typedef typename NumTraits::Real& type; +}; + +/**************************************************************************** + * Implementation of imag_ref * + ****************************************************************************/ + +template +struct imag_ref_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar& run(Scalar& x) { return reinterpret_cast(&x)[1]; } + EIGEN_DEVICE_FUNC static inline const RealScalar& run(const Scalar& x) { + return reinterpret_cast(&x)[1]; + } +}; + +template +struct imag_ref_default_impl { + EIGEN_DEVICE_FUNC constexpr static Scalar run(Scalar&) { return Scalar(0); } + EIGEN_DEVICE_FUNC constexpr static const Scalar run(const Scalar&) { return Scalar(0); } +}; + +template +struct imag_ref_impl : imag_ref_default_impl::IsComplex> {}; + +template +struct imag_ref_retval { + typedef typename NumTraits::Real& type; +}; + +} // namespace internal + +namespace numext { + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real, Scalar) real(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(real, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t real_ref( + const Scalar& x) { + return internal::real_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(real_ref, Scalar) real_ref(Scalar& x) { + return EIGEN_MATHFUNC_IMPL(real_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag, Scalar) imag(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(imag, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar select(const Scalar& mask, const Scalar& a, const Scalar& b) { + return numext::is_exactly_zero(mask) ? b : a; +} + +} // namespace numext + +namespace internal { + +/**************************************************************************** + * Implementation of conj * + ****************************************************************************/ + +template ::IsComplex> +struct conj_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { return x; } +}; + +template +struct conj_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { + using std::conj; + return conj(x); + } +}; + +template ::IsComplex> +struct conj_impl : conj_default_impl {}; + +template +struct conj_retval { + typedef Scalar type; +}; + +/**************************************************************************** + * Implementation of abs2 * + ****************************************************************************/ + +template +struct abs2_impl_default { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { return x * x; } +}; + +template +struct abs2_impl_default // IsComplex +{ + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + return numext::real(x) * numext::real(x) + numext::imag(x) * numext::imag(x); + } +}; + +template +struct abs2_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + return abs2_impl_default::IsComplex>::run(x); + } +}; + +template +struct abs2_retval { + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** + * Implementation of sqrt/rsqrt * + ****************************************************************************/ + +template +struct sqrt_impl { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE Scalar run(const Scalar& x) { + EIGEN_USING_STD(sqrt); + return sqrt(x); + } +}; + +// Complex sqrt defined in MathFunctionsImpl.h. +template +EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& a_x); + +// Custom implementation is faster than `std::sqrt`, works on +// GPU, and correctly handles special cases (unlike MSVC). +template +struct sqrt_impl> { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex run(const std::complex& x) { return complex_sqrt(x); } +}; + +template +struct sqrt_retval { + typedef Scalar type; +}; + +// Default implementation relies on numext::sqrt, at bottom of file. +template +struct rsqrt_impl; + +// Complex rsqrt defined in MathFunctionsImpl.h. +template +EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& a_x); + +template +struct rsqrt_impl> { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE std::complex run(const std::complex& x) { + return complex_rsqrt(x); + } +}; + +template +struct rsqrt_retval { + typedef Scalar type; +}; + +/**************************************************************************** + * Implementation of norm1 * + ****************************************************************************/ + +template +struct norm1_default_impl; + +template +struct norm1_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + EIGEN_USING_STD(abs); + return abs(numext::real(x)) + abs(numext::imag(x)); + } +}; + +template +struct norm1_default_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { + EIGEN_USING_STD(abs); + return abs(x); + } +}; + +template +struct norm1_impl : norm1_default_impl::IsComplex> {}; + +template +struct norm1_retval { + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** + * Implementation of hypot * + ****************************************************************************/ + +template +struct hypot_impl; + +template +struct hypot_retval { + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** + * Implementation of cast * + ****************************************************************************/ + +template +struct cast_impl { + EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { return static_cast(x); } +}; + +template +struct cast_impl { + EIGEN_DEVICE_FUNC static inline bool run(const OldType& x) { return x != OldType(0); } +}; + +// Casting from S -> Complex leads to an implicit conversion from S to T, +// generating warnings on clang. Here we explicitly cast the real component. +template +struct cast_impl::IsComplex && NumTraits::IsComplex>> { + EIGEN_DEVICE_FUNC static inline NewType run(const OldType& x) { + typedef typename NumTraits::Real NewReal; + return static_cast(static_cast(x)); + } +}; + +// here, for once, we're plainly returning NewType: we don't want cast to do weird things. + +template +EIGEN_DEVICE_FUNC inline NewType cast(const OldType& x) { + return cast_impl::run(x); +} + +/**************************************************************************** + * Implementation of arg * + ****************************************************************************/ + +// Visual Studio 2017 has a bug where arg(float) returns 0 for negative inputs. +// This seems to be fixed in VS 2019. +#if (!EIGEN_COMP_MSVC || EIGEN_COMP_MSVC >= 1920) +// std::arg is only defined for types of std::complex, or integer types or float/double/long double +template ::IsComplex || is_integral::value || + is_same::value || is_same::value || + is_same::value> +struct arg_default_impl; + +template +struct arg_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + // There is no official ::arg on device in CUDA/HIP, so we always need to use std::arg. + using std::arg; + return static_cast(arg(x)); + } +}; + +// Must be non-complex floating-point type (e.g. half/bfloat16). +template +struct arg_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + return (x < Scalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0); + } +}; +#else +template ::IsComplex> +struct arg_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + return (x < RealScalar(0)) ? RealScalar(EIGEN_PI) : RealScalar(0); + } +}; + +template +struct arg_default_impl { + typedef typename NumTraits::Real RealScalar; + EIGEN_DEVICE_FUNC static inline RealScalar run(const Scalar& x) { + EIGEN_USING_STD(arg); + return arg(x); + } +}; +#endif +template +struct arg_impl : arg_default_impl {}; + +template +struct arg_retval { + typedef typename NumTraits::Real type; +}; + +/**************************************************************************** + * Implementation of expm1 * + ****************************************************************************/ + +// This implementation is based on GSL Math's expm1. +namespace std_fallback { +// fallback expm1 implementation in case there is no expm1(Scalar) function in namespace of Scalar, +// or that there is no suitable std::expm1 function available. Implementation +// attributed to Kahan. See: http://www.plunk.org/~hatch/rightway.php. +template +EIGEN_DEVICE_FUNC inline Scalar expm1(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits::Real RealScalar; + + EIGEN_USING_STD(exp); + Scalar u = exp(x); + if (numext::equal_strict(u, Scalar(1))) { + return x; + } + Scalar um1 = u - RealScalar(1); + if (numext::equal_strict(um1, Scalar(-1))) { + return RealScalar(-1); + } + + EIGEN_USING_STD(log); + Scalar logu = log(u); + return numext::equal_strict(u, logu) ? u : (u - RealScalar(1)) * x / logu; +} +} // namespace std_fallback + +template +struct expm1_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + EIGEN_USING_STD(expm1); + return expm1(x); + } +}; + +template +struct expm1_retval { + typedef Scalar type; +}; + +/**************************************************************************** + * Implementation of log * + ****************************************************************************/ + +// Complex log defined in MathFunctionsImpl.h. +template +EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z); + +template +struct log_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { + EIGEN_USING_STD(log); + return static_cast(log(x)); + } +}; + +template +struct log_impl> { + EIGEN_DEVICE_FUNC static inline std::complex run(const std::complex& z) { return complex_log(z); } +}; + +/**************************************************************************** + * Implementation of log1p * + ****************************************************************************/ + +namespace std_fallback { +// fallback log1p implementation in case there is no log1p(Scalar) function in namespace of Scalar, +// or that there is no suitable std::log1p function available +template +EIGEN_DEVICE_FUNC inline Scalar log1p(const Scalar& x) { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + typedef typename NumTraits::Real RealScalar; + EIGEN_USING_STD(log); + Scalar x1p = RealScalar(1) + x; + Scalar log_1p = log_impl::run(x1p); + const bool is_small = numext::equal_strict(x1p, Scalar(1)); + const bool is_inf = numext::equal_strict(x1p, log_1p); + return (is_small || is_inf) ? x : x * (log_1p / (x1p - RealScalar(1))); +} +} // namespace std_fallback + +template +struct log1p_impl { + EIGEN_STATIC_ASSERT_NON_INTEGER(Scalar) + + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& x) { + EIGEN_USING_STD(log1p); + return log1p(x); + } +}; + +// Specialization for complex types that are not supported by std::log1p. +template +struct log1p_impl> { + EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar) + + EIGEN_DEVICE_FUNC static inline std::complex run(const std::complex& x) { + return std_fallback::log1p(x); + } +}; + +template +struct log1p_retval { + typedef Scalar type; +}; + +/**************************************************************************** + * Implementation of pow * + ****************************************************************************/ + +template ::IsInteger && NumTraits::IsInteger> +struct pow_impl { + // typedef Scalar retval; + typedef typename ScalarBinaryOpTraits>::ReturnType + result_type; + static EIGEN_DEVICE_FUNC inline result_type run(const ScalarX& x, const ScalarY& y) { + EIGEN_USING_STD(pow); + return pow(x, y); + } +}; + +template +struct pow_impl { + typedef ScalarX result_type; + static EIGEN_DEVICE_FUNC inline ScalarX run(ScalarX x, ScalarY y) { + ScalarX res(1); + eigen_assert(!NumTraits::IsSigned || y >= 0); + if (y & 1) res *= x; + y >>= 1; + while (y) { + x *= x; + if (y & 1) res *= x; + y >>= 1; + } + return res; + } +}; + +enum { meta_floor_log2_terminate, meta_floor_log2_move_up, meta_floor_log2_move_down, meta_floor_log2_bogus }; + +template +struct meta_floor_log2_selector { + enum { + middle = (lower + upper) / 2, + value = (upper <= lower + 1) ? int(meta_floor_log2_terminate) + : (n < (1 << middle)) ? int(meta_floor_log2_move_down) + : (n == 0) ? int(meta_floor_log2_bogus) + : int(meta_floor_log2_move_up) + }; +}; + +template ::value> +struct meta_floor_log2 {}; + +template +struct meta_floor_log2 { + enum { value = meta_floor_log2::middle>::value }; +}; + +template +struct meta_floor_log2 { + enum { value = meta_floor_log2::middle, upper>::value }; +}; + +template +struct meta_floor_log2 { + enum { value = (n >= ((unsigned int)(1) << (lower + 1))) ? lower + 1 : lower }; +}; + +template +struct meta_floor_log2 { + // no value, error at compile time +}; + +template +struct count_bits_impl { + static_assert(std::is_integral::value && std::is_unsigned::value, + "BitsType must be an unsigned integer"); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits >> shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + int n = CHAR_BIT * sizeof(BitsType); + int shift = n / 2; + while (bits > 0 && shift > 0) { + BitsType y = bits << shift; + if (y > 0) { + n -= shift; + bits = y; + } + shift /= 2; + } + if (shift == 0) { + --n; + } + return n; + } +}; + +// Count leading zeros. +template +EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + return count_bits_impl::clz(bits); +} + +// Count trailing zeros. +template +EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return count_bits_impl::ctz(bits); +} + +#if EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template +struct count_bits_impl< + BitsType, std::enable_if_t::value && sizeof(BitsType) <= sizeof(unsigned int)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned int) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clz(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctz(static_cast(bits)); + } +}; + +template +struct count_bits_impl::value && sizeof(unsigned int) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzl(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzl(static_cast(bits)); + } +}; + +template +struct count_bits_impl::value && sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(unsigned long long)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + static constexpr int kLeadingBitsOffset = (sizeof(unsigned long long) - sizeof(BitsType)) * CHAR_BIT; + return bits == 0 ? kNumBits : __builtin_clzll(static_cast(bits)) - kLeadingBitsOffset; + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + return bits == 0 ? kNumBits : __builtin_ctzll(static_cast(bits)); + } +}; + +#elif EIGEN_COMP_MSVC + +template +struct count_bits_impl< + BitsType, std::enable_if_t::value && sizeof(BitsType) <= sizeof(unsigned long)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + unsigned long out; + _BitScanReverse(&out, static_cast(bits)); + return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast(out); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out); + } +}; + +#ifdef _WIN64 + +template +struct count_bits_impl::value && sizeof(unsigned long) < sizeof(BitsType) && + sizeof(BitsType) <= sizeof(__int64)>> { + static constexpr int kNumBits = static_cast(sizeof(BitsType) * CHAR_BIT); + static EIGEN_DEVICE_FUNC inline int clz(BitsType bits) { + unsigned long out; + _BitScanReverse64(&out, static_cast(bits)); + return bits == 0 ? kNumBits : (kNumBits - 1) - static_cast(out); + } + + static EIGEN_DEVICE_FUNC inline int ctz(BitsType bits) { + unsigned long out; + _BitScanForward64(&out, static_cast(bits)); + return bits == 0 ? kNumBits : static_cast(out); + } +}; + +#endif // _WIN64 + +#endif // EIGEN_COMP_GNUC || EIGEN_COMP_CLANG + +template +struct log_2_impl { + static constexpr int kTotalBits = sizeof(BitsType) * CHAR_BIT; + static EIGEN_DEVICE_FUNC inline int run_ceil(const BitsType& x) { + const int n = kTotalBits - clz(x); + bool power_of_two = (x & (x - 1)) == 0; + return x == 0 ? 0 : power_of_two ? (n - 1) : n; + } + static EIGEN_DEVICE_FUNC inline int run_floor(const BitsType& x) { + const int n = kTotalBits - clz(x); + return x == 0 ? 0 : n - 1; + } +}; + +template +int log2_ceil(const BitsType& x) { + return log_2_impl::run_ceil(x); +} + +template +int log2_floor(const BitsType& x) { + return log_2_impl::run_floor(x); +} + +// Implementation of is* functions + +template +EIGEN_DEVICE_FUNC std::enable_if_t::has_infinity || std::numeric_limits::has_quiet_NaN || + std::numeric_limits::has_signaling_NaN), + bool> +isfinite_impl(const T&) { + return true; +} + +template +EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits::has_infinity || std::numeric_limits::has_quiet_NaN || + std::numeric_limits::has_signaling_NaN) && + (!NumTraits::IsComplex), + bool> +isfinite_impl(const T& x) { + EIGEN_USING_STD(isfinite); + return isfinite EIGEN_NOT_A_MACRO(x); +} + +template +EIGEN_DEVICE_FUNC std::enable_if_t::has_infinity, bool> isinf_impl(const T&) { + return false; +} + +template +EIGEN_DEVICE_FUNC std::enable_if_t<(std::numeric_limits::has_infinity && !NumTraits::IsComplex), bool> isinf_impl( + const T& x) { + EIGEN_USING_STD(isinf); + return isinf EIGEN_NOT_A_MACRO(x); +} + +template +EIGEN_DEVICE_FUNC +std::enable_if_t::has_quiet_NaN || std::numeric_limits::has_signaling_NaN), bool> +isnan_impl(const T&) { + return false; +} + +template +EIGEN_DEVICE_FUNC std::enable_if_t< + (std::numeric_limits::has_quiet_NaN || std::numeric_limits::has_signaling_NaN) && (!NumTraits::IsComplex), + bool> +isnan_impl(const T& x) { + EIGEN_USING_STD(isnan); + return isnan EIGEN_NOT_A_MACRO(x); +} + +// The following overload are defined at the end of this file +template +EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x); +template +EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x); +template +EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x); +template +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS T ptanh_float(const T& a_x); + +/**************************************************************************** + * Implementation of sign * + ****************************************************************************/ +template ::IsComplex != 0), + bool IsInteger = (NumTraits::IsInteger != 0)> +struct sign_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a) { return Scalar((a > Scalar(0)) - (a < Scalar(0))); } +}; + +template +struct sign_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a) { + return (isnan_impl)(a) ? a : Scalar((a > Scalar(0)) - (a < Scalar(0))); + } +}; + +template +struct sign_impl { + EIGEN_DEVICE_FUNC static inline Scalar run(const Scalar& a) { + using real_type = typename NumTraits::Real; + EIGEN_USING_STD(abs); + real_type aa = abs(a); + if (aa == real_type(0)) return Scalar(0); + aa = real_type(1) / aa; + return Scalar(numext::real(a) * aa, numext::imag(a) * aa); + } +}; + +// The sign function for bool is the identity. +template <> +struct sign_impl { + EIGEN_DEVICE_FUNC static inline bool run(const bool& a) { return a; } +}; + +template +struct sign_retval { + typedef Scalar type; +}; + +// suppress "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC +// note: `0 - a` is distinct from `-a` when Scalar is a floating point type and `a` is zero + +template ::IsInteger> +struct negate_impl { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return -a; } +}; + +template +struct negate_impl { + EIGEN_STATIC_ASSERT((!is_same::value), NEGATE IS NOT DEFINED FOR BOOLEAN TYPES) + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE Scalar run(const Scalar& a) { return Scalar(0) - a; } +}; + +template +struct negate_retval { + typedef Scalar type; +}; + +template ::type>::IsInteger> +struct nearest_integer_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) { + EIGEN_USING_STD(floor) return floor(x); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) { + EIGEN_USING_STD(ceil) return ceil(x); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) { + EIGEN_USING_STD(rint) return rint(x); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { + EIGEN_USING_STD(round) return round(x); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { + EIGEN_USING_STD(trunc) return trunc(x); + } +}; +template +struct nearest_integer_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_floor(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_ceil(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_rint(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_round(const Scalar& x) { return x; } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run_trunc(const Scalar& x) { return x; } +}; + +// Extra namespace to prevent leaking std::fma into Eigen::internal. +namespace has_fma_detail { + +template +struct has_fma_impl : public std::false_type {}; + +using std::fma; + +template +struct has_fma_impl< + T, std::enable_if_t(), std::declval(), std::declval()))>::value>> + : public std::true_type {}; + +} // namespace has_fma_detail + +template +struct has_fma : public has_fma_detail::has_fma_impl {}; + +// Default implementation. +template +struct fma_impl { + static_assert(has_fma::value, "No function fma(...) for type. Please provide an implementation."); +}; + +// STD or ADL version if it exists. +template +struct fma_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T run(const T& a, const T& b, const T& c) { + using std::fma; + return fma(a, b, c); + } +}; + +#if defined(EIGEN_GPUCC) +template <> +struct has_fma : public true_type {}; + +template <> +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float run(const float& a, const float& b, const float& c) { + return ::fmaf(a, b, c); + } +}; + +template <> +struct has_fma : public true_type {}; + +template <> +struct fma_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE double run(const double& a, const double& b, const double& c) { + return ::fma(a, b, c); + } +}; +#endif + +// Basic multiply-add. +template +struct madd_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { + return x * y + z; + } +}; + +#if EIGEN_SCALAR_MADD_USE_FMA +template +struct madd_impl::value>> { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar run(const Scalar& x, const Scalar& y, const Scalar& z) { + return fma_impl::run(x, y, z); + } +}; +#endif + +} // end namespace internal + +/**************************************************************************** + * Generic math functions * + ****************************************************************************/ + +namespace numext { + +#if (!defined(EIGEN_GPUCC) || defined(EIGEN_CONSTEXPR_ARE_DEVICE_FUNC)) +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) { + EIGEN_USING_STD(min) + return min EIGEN_NOT_A_MACRO(x, y); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) { + EIGEN_USING_STD(max) + return max EIGEN_NOT_A_MACRO(x, y); +} +#else +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T mini(const T& x, const T& y) { + return y < x ? y : x; +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float mini(const float& x, const float& y) { + return fminf(x, y); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double mini(const double& x, const double& y) { + return fmin(x, y); +} + +#ifndef EIGEN_GPU_COMPILE_PHASE +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double mini(const long double& x, const long double& y) { +#if defined(EIGEN_HIPCC) + // no "fminl" on HIP yet + return (x < y) ? x : y; +#else + return fminl(x, y); +#endif +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T maxi(const T& x, const T& y) { + return x < y ? y : x; +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float maxi(const float& x, const float& y) { + return fmaxf(x, y); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double maxi(const double& x, const double& y) { + return fmax(x, y); +} +#ifndef EIGEN_GPU_COMPILE_PHASE +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double maxi(const long double& x, const long double& y) { +#if defined(EIGEN_HIPCC) + // no "fmaxl" on HIP yet + return (x > y) ? x : y; +#else + return fmaxl(x, y); +#endif +} +#endif +#endif + +#if defined(SYCL_DEVICE_ONLY) + +#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_char) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_short) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_int) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_long) +#define SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_char) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_short) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_int) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_long) +#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_uint) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong) +#define SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uchar) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ushort) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_uint) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_ulong) +#define SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY(NAME, FUNC) +#define SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY(NAME, FUNC) +#define SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(NAME, FUNC) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \ + SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, cl::sycl::cl_double) +#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(NAME, FUNC) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_float) \ + SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, cl::sycl::cl_double) +#define SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(NAME, FUNC, RET_TYPE) \ + SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_float) \ + SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, cl::sycl::cl_double) + +#define SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE& x) { \ + return cl::sycl::FUNC(x); \ + } + +#define SYCL_SPECIALIZE_UNARY_FUNC(NAME, FUNC, TYPE) SYCL_SPECIALIZE_GEN_UNARY_FUNC(NAME, FUNC, TYPE, TYPE) + +#define SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE1, ARG_TYPE2) \ + template <> \ + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE RET_TYPE NAME(const ARG_TYPE1& x, const ARG_TYPE2& y) { \ + return cl::sycl::FUNC(x, y); \ + } + +#define SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE) \ + SYCL_SPECIALIZE_GEN1_BINARY_FUNC(NAME, FUNC, RET_TYPE, ARG_TYPE, ARG_TYPE) + +#define SYCL_SPECIALIZE_BINARY_FUNC(NAME, FUNC, TYPE) SYCL_SPECIALIZE_GEN2_BINARY_FUNC(NAME, FUNC, TYPE, TYPE) + +SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(mini, min) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(mini, fmin) +SYCL_SPECIALIZE_INTEGER_TYPES_BINARY(maxi, max) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(maxi, fmax) + +#endif + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(arg, Scalar) arg(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(arg, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline internal::add_const_on_value_type_t imag_ref( + const Scalar& x) { + return internal::imag_ref_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(imag_ref, Scalar) imag_ref(Scalar& x) { + return EIGEN_MATHFUNC_IMPL(imag_ref, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(conj, Scalar) conj(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(conj, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(sign, Scalar) sign(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(sign, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(negate, Scalar) negate(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(negate, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(abs2, Scalar) abs2(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(abs2, Scalar)::run(x); +} + +EIGEN_DEVICE_FUNC inline bool abs2(bool x) { return x; } + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T absdiff(const T& x, const T& y) { + return x > y ? x - y : y - x; +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float absdiff(const float& x, const float& y) { + return fabsf(x - y); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double absdiff(const double& x, const double& y) { + return fabs(x - y); +} + +// HIP and CUDA do not support long double. +#ifndef EIGEN_GPU_COMPILE_PHASE +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE long double absdiff(const long double& x, const long double& y) { + return fabsl(x - y); +} +#endif + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(norm1, Scalar) norm1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(norm1, Scalar)::run(x); +} + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(hypot, Scalar) hypot(const Scalar& x, const Scalar& y) { + return EIGEN_MATHFUNC_IMPL(hypot, Scalar)::run(x, y); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(hypot, hypot) +#endif + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(log1p, Scalar) log1p(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(log1p, Scalar)::run(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log1p, log1p) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log1p(const float& x) { + return ::log1pf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log1p(const double& x) { + return ::log1p(x); +} +#endif + +template +EIGEN_DEVICE_FUNC inline typename internal::pow_impl::result_type pow(const ScalarX& x, + const ScalarY& y) { + return internal::pow_impl::run(x, y); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(pow, pow) +#endif + +template +EIGEN_DEVICE_FUNC bool(isnan)(const T& x) { + return internal::isnan_impl(x); +} +template +EIGEN_DEVICE_FUNC bool(isinf)(const T& x) { + return internal::isinf_impl(x); +} +template +EIGEN_DEVICE_FUNC bool(isfinite)(const T& x) { + return internal::isfinite_impl(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isnan, isnan, bool) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isinf, isinf, bool) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE(isfinite, isfinite, bool) +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar rint(const Scalar& x) { + return internal::nearest_integer_impl::run_rint(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar round(const Scalar& x) { + return internal::nearest_integer_impl::run_round(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(floor)(const Scalar& x) { + return internal::nearest_integer_impl::run_floor(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(ceil)(const Scalar& x) { + return internal::nearest_integer_impl::run_ceil(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar(trunc)(const Scalar& x) { + return internal::nearest_integer_impl::run_trunc(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(round, round) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(floor, floor) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(ceil, ceil) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(trunc, trunc) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float floor(const float& x) { + return ::floorf(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double floor(const double& x) { + return ::floor(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float ceil(const float& x) { + return ::ceilf(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double ceil(const double& x) { + return ::ceil(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float trunc(const float& x) { + return ::truncf(x); +} +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double trunc(const double& x) { + return ::trunc(x); +} +#endif + +// Integer division with rounding up. +// T is assumed to be an integer type with a>=0, and b>0 +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T div_ceil(T a, T b) { + using UnsignedT = typename internal::make_unsigned::type; + EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) + // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations + const UnsignedT ua = UnsignedT(a); + const UnsignedT ub = UnsignedT(b); + // Note: This form is used because it cannot overflow. + return ua == 0 ? 0 : (ua - 1) / ub + 1; +} + +// Integer round down to nearest power of b +// T is assumed to be an integer type with a>=0, and b>0 +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE constexpr T round_down(T a, U b) { + using UnsignedT = typename internal::make_unsigned::type; + using UnsignedU = typename internal::make_unsigned::type; + EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) + EIGEN_STATIC_ASSERT((NumTraits::IsInteger), THIS FUNCTION IS FOR INTEGER TYPES) + // Note: explicitly declaring a and b as non-negative values allows the compiler to use better optimizations + const UnsignedT ua = UnsignedT(a); + const UnsignedU ub = UnsignedU(b); + return ub * (ua / ub); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log2(T x) { + EIGEN_USING_STD(log2); + return log2(x); +} + +/** Log base 2 for 32 bits positive integers. + * Conveniently returns 0 for x==0. */ +constexpr int log2(int x) { + unsigned int v(x); + constexpr int table[32] = {0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, + 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31}; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return table[(v * 0x07C4ACDDU) >> 27]; +} + +/** \returns the square root of \a x. + * + * It is essentially equivalent to + * \code using std::sqrt; return sqrt(x); \endcode + * but slightly faster for float/double and some compilers (e.g., gcc), thanks to + * specializations when SSE is enabled. + * + * It's usage is justified in performance critical functions, like norm/normalize. + */ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE EIGEN_MATHFUNC_RETVAL(sqrt, Scalar) sqrt(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(sqrt, Scalar)::run(x); +} + +// Boolean specialization, avoids implicit float to bool conversion (-Wimplicit-conversion-floating-point-to-bool). +template <> +EIGEN_DEFINE_FUNCTION_ALLOWING_MULTIPLE_DEFINITIONS EIGEN_DEVICE_FUNC bool sqrt(const bool& x) { + return x; +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sqrt, sqrt) +#endif + +/** \returns the cube root of \a x. **/ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t::IsComplex, T> cbrt(const T& x) { + EIGEN_USING_STD(cbrt); + return static_cast(cbrt(x)); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::enable_if_t::IsComplex, T> cbrt(const T& x) { + EIGEN_USING_STD(pow); + return pow(x, typename NumTraits::Real(1.0 / 3.0)); +} + +/** \returns the reciprocal square root of \a x. **/ +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T rsqrt(const T& x) { + return internal::rsqrt_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T log(const T& x) { + return internal::log_impl::run(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(log, log) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float log(const float& x) { + return ::logf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double log(const double& x) { + return ::log(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE + std::enable_if_t::IsSigned || NumTraits::IsComplex, typename NumTraits::Real> + abs(const T& x) { + EIGEN_USING_STD(abs); + return abs(x); +} + +template +EIGEN_DEVICE_FUNC +EIGEN_ALWAYS_INLINE std::enable_if_t::IsSigned || NumTraits::IsComplex), typename NumTraits::Real> +abs(const T& x) { + return x; +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_INTEGER_TYPES_UNARY(abs, abs) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(abs, fabs) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const float& x) { + return ::fabsf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double abs(const double& x) { + return ::fabs(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float abs(const std::complex& x) { + return ::hypotf(x.real(), x.imag()); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double abs(const std::complex& x) { + return ::hypot(x.real(), x.imag()); +} +#endif + +template ::IsInteger, bool IsSigned = NumTraits::IsSigned> +struct signbit_impl; +template +struct signbit_impl { + static constexpr size_t Size = sizeof(Scalar); + static constexpr size_t Shift = (CHAR_BIT * Size) - 1; + using intSize_t = typename get_integer_by_size::signed_type; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static Scalar run(const Scalar& x) { + intSize_t a = bit_cast(x); + a = a >> Shift; + Scalar result = bit_cast(a); + return result; + } +}; +template +struct signbit_impl { + static constexpr size_t Size = sizeof(Scalar); + static constexpr size_t Shift = (CHAR_BIT * Size) - 1; + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar& x) { return x >> Shift; } +}; +template +struct signbit_impl { + EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar run(const Scalar&) { return Scalar(0); } +}; +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE static constexpr Scalar signbit(const Scalar& x) { + return signbit_impl::run(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp(const T& x) { + EIGEN_USING_STD(exp); + return exp(x); +} + +// MSVC screws up some edge-cases for std::exp(complex). +#ifdef EIGEN_COMP_MSVC +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::complex& x) { + EIGEN_USING_STD(exp); + // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised. + // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised. + if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) { + return std::complex(NumTraits::quiet_NaN(), NumTraits::quiet_NaN()); + } + // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified) + // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified) + if ((real_ref(x) == NumTraits::infinity() && !(isfinite)(imag_ref(x)))) { + return std::complex(NumTraits::infinity(), NumTraits::quiet_NaN()); + } + return exp(x); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp, exp) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp(const float& x) { + return ::expf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp(const double& x) { + return ::exp(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::complex& x) { + float com = ::expf(x.real()); + float res_real = com * ::cosf(x.imag()); + float res_imag = com * ::sinf(x.imag()); + return std::complex(res_real, res_imag); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp(const std::complex& x) { + double com = ::exp(x.real()); + double res_real = com * ::cos(x.imag()); + double res_imag = com * ::sin(x.imag()); + return std::complex(res_real, res_imag); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T exp2(const T& x) { + EIGEN_USING_STD(exp2); + return exp2(x); +} + +// MSVC screws up some edge-cases for std::exp2(complex). +#ifdef EIGEN_COMP_MSVC +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + EIGEN_USING_STD(exp); + // If z is (x,±∞) (for any finite x), the result is (NaN,NaN) and FE_INVALID is raised. + // If z is (x,NaN) (for any finite x), the result is (NaN,NaN) and FE_INVALID may be raised. + if ((isfinite)(real_ref(x)) && !(isfinite)(imag_ref(x))) { + return std::complex(NumTraits::quiet_NaN(), NumTraits::quiet_NaN()); + } + // If z is (+∞,±∞), the result is (±∞,NaN) and FE_INVALID is raised (the sign of the real part is unspecified) + // If z is (+∞,NaN), the result is (±∞,NaN) (the sign of the real part is unspecified) + if ((real_ref(x) == NumTraits::infinity() && !(isfinite)(imag_ref(x)))) { + return std::complex(NumTraits::infinity(), NumTraits::quiet_NaN()); + } + return exp2(x); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(exp2, exp2) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float exp2(const float& x) { + return ::exp2f(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double exp2(const double& x) { + return ::exp2(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + float com = ::exp2f(x.real()); + float res_real = com * ::cosf(static_cast(EIGEN_LN2) * x.imag()); + float res_imag = com * ::sinf(static_cast(EIGEN_LN2) * x.imag()); + return std::complex(res_real, res_imag); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE std::complex exp2(const std::complex& x) { + double com = ::exp2(x.real()); + double res_real = com * ::cos(static_cast(EIGEN_LN2) * x.imag()); + double res_imag = com * ::sin(static_cast(EIGEN_LN2) * x.imag()); + return std::complex(res_real, res_imag); +} +#endif + +template +EIGEN_DEVICE_FUNC inline EIGEN_MATHFUNC_RETVAL(expm1, Scalar) expm1(const Scalar& x) { + return EIGEN_MATHFUNC_IMPL(expm1, Scalar)::run(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(expm1, expm1) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float expm1(const float& x) { + return ::expm1f(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double expm1(const double& x) { + return ::expm1(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cos(const T& x) { + EIGEN_USING_STD(cos); + return cos(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cos, cos) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cos(const float& x) { + return ::cosf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double cos(const double& x) { + return ::cos(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sin(const T& x) { + EIGEN_USING_STD(sin); + return sin(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sin, sin) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sin(const float& x) { + return ::sinf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double sin(const double& x) { + return ::sin(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tan(const T& x) { + EIGEN_USING_STD(tan); + return tan(x); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tan, tan) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tan(const float& x) { + return ::tanf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double tan(const double& x) { + return ::tan(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T acos(const T& x) { + EIGEN_USING_STD(acos); + return acos(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T acosh(const T& x) { + EIGEN_USING_STD(acosh); + return static_cast(acosh(x)); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acos, acos) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(acosh, acosh) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float acos(const float& x) { + return ::acosf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double acos(const double& x) { + return ::acos(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T asin(const T& x) { + EIGEN_USING_STD(asin); + return asin(x); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T asinh(const T& x) { + EIGEN_USING_STD(asinh); + return static_cast(asinh(x)); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asin, asin) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(asinh, asinh) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float asin(const float& x) { + return ::asinf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double asin(const double& x) { + return ::asin(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atan(const T& x) { + EIGEN_USING_STD(atan); + return static_cast(atan(x)); +} + +template ::IsComplex, int> = 0> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atan2(const T& y, const T& x) { + EIGEN_USING_STD(atan2); + return static_cast(atan2(y, x)); +} + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T atanh(const T& x) { + EIGEN_USING_STD(atanh); + return static_cast(atanh(x)); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atan, atan) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(atanh, atanh) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float atan(const float& x) { + return ::atanf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double atan(const double& x) { + return ::atan(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T cosh(const T& x) { + EIGEN_USING_STD(cosh); + return static_cast(cosh(x)); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(cosh, cosh) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float cosh(const float& x) { + return ::coshf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double cosh(const double& x) { + return ::cosh(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T sinh(const T& x) { + EIGEN_USING_STD(sinh); + return static_cast(sinh(x)); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(sinh, sinh) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float sinh(const float& x) { + return ::sinhf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double sinh(const double& x) { + return ::sinh(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T tanh(const T& x) { + EIGEN_USING_STD(tanh); + return tanh(x); +} + +#if (!defined(EIGEN_GPUCC)) && EIGEN_FAST_MATH && !defined(SYCL_DEVICE_ONLY) +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(float x) { return internal::ptanh_float(x); } +#endif + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_UNARY(tanh, tanh) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float tanh(const float& x) { + return ::tanhf(x); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double tanh(const double& x) { + return ::tanh(x); +} +#endif + +template +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T fmod(const T& a, const T& b) { + EIGEN_USING_STD(fmod); + return fmod(a, b); +} + +#if defined(SYCL_DEVICE_ONLY) +SYCL_SPECIALIZE_FLOATING_TYPES_BINARY(fmod, fmod) +#endif + +#if defined(EIGEN_GPUCC) +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float fmod(const float& a, const float& b) { + return ::fmodf(a, b); +} + +template <> +EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE double fmod(const double& a, const double& b) { + return ::fmod(a, b); +} +#endif + +#if defined(SYCL_DEVICE_ONLY) +#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_BINARY +#undef SYCL_SPECIALIZE_SIGNED_INTEGER_TYPES_UNARY +#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_BINARY +#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY +#undef SYCL_SPECIALIZE_INTEGER_TYPES_BINARY +#undef SYCL_SPECIALIZE_UNSIGNED_INTEGER_TYPES_UNARY +#undef SYCL_SPECIALIZE_FLOATING_TYPES_BINARY +#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY +#undef SYCL_SPECIALIZE_FLOATING_TYPES_UNARY_FUNC_RET_TYPE +#undef SYCL_SPECIALIZE_GEN_UNARY_FUNC +#undef SYCL_SPECIALIZE_UNARY_FUNC +#undef SYCL_SPECIALIZE_GEN1_BINARY_FUNC +#undef SYCL_SPECIALIZE_GEN2_BINARY_FUNC +#undef SYCL_SPECIALIZE_BINARY_FUNC +#endif + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_left(const Scalar& a, int n) { + return a << n; +} + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar logical_shift_right(const Scalar& a, int n) { + using UnsignedScalar = typename numext::get_integer_by_size::unsigned_type; + return bit_cast(bit_cast(a) >> n); +} + +template ::value>> +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar arithmetic_shift_right(const Scalar& a, int n) { + using SignedScalar = typename numext::get_integer_by_size::signed_type; + return bit_cast(bit_cast(a) >> n); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar fma(const Scalar& x, const Scalar& y, const Scalar& z) { + return internal::fma_impl::run(x, y, z); +} + +// Multiply-add. +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar madd(const Scalar& x, const Scalar& y, const Scalar& z) { + return internal::madd_impl::run(x, y, z); +} + +} // end namespace numext + +namespace internal { + +template +EIGEN_DEVICE_FUNC bool isfinite_impl(const std::complex& x) { + return (numext::isfinite)(numext::real(x)) && (numext::isfinite)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isnan_impl(const std::complex& x) { + return (numext::isnan)(numext::real(x)) || (numext::isnan)(numext::imag(x)); +} + +template +EIGEN_DEVICE_FUNC bool isinf_impl(const std::complex& x) { + return ((numext::isinf)(numext::real(x)) || (numext::isinf)(numext::imag(x))) && (!(numext::isnan)(x)); +} + +/**************************************************************************** + * Implementation of fuzzy comparisons * + ****************************************************************************/ + +template +struct scalar_fuzzy_default_impl {}; + +template +struct scalar_fuzzy_default_impl { + typedef typename NumTraits::Real RealScalar; + template + EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, + const RealScalar& prec) { + return numext::abs(x) <= numext::abs(y) * prec; + } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { + return numext::abs(x - y) <= numext::mini(numext::abs(x), numext::abs(y)) * prec; + } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar& prec) { + return x <= y || isApprox(x, y, prec); + } +}; + +template +struct scalar_fuzzy_default_impl { + typedef typename NumTraits::Real RealScalar; + template + EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const Scalar&, const RealScalar&) { + return x == Scalar(0); + } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar&) { return x == y; } + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const Scalar& x, const Scalar& y, const RealScalar&) { + return x <= y; + } +}; + +template +struct scalar_fuzzy_default_impl { + typedef typename NumTraits::Real RealScalar; + template + EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const Scalar& x, const OtherScalar& y, + const RealScalar& prec) { + return numext::abs2(x) <= numext::abs2(y) * prec * prec; + } + EIGEN_DEVICE_FUNC static inline bool isApprox(const Scalar& x, const Scalar& y, const RealScalar& prec) { + return numext::abs2(x - y) <= numext::mini(numext::abs2(x), numext::abs2(y)) * prec * prec; + } +}; + +template +struct scalar_fuzzy_impl + : scalar_fuzzy_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +EIGEN_DEVICE_FUNC inline bool isMuchSmallerThan( + const Scalar& x, const OtherScalar& y, + const typename NumTraits::Real& precision = NumTraits::dummy_precision()) { + return scalar_fuzzy_impl::template isMuchSmallerThan(x, y, precision); +} + +template +EIGEN_DEVICE_FUNC inline bool isApprox( + const Scalar& x, const Scalar& y, + const typename NumTraits::Real& precision = NumTraits::dummy_precision()) { + return scalar_fuzzy_impl::isApprox(x, y, precision); +} + +template +EIGEN_DEVICE_FUNC inline bool isApproxOrLessThan( + const Scalar& x, const Scalar& y, + const typename NumTraits::Real& precision = NumTraits::dummy_precision()) { + return scalar_fuzzy_impl::isApproxOrLessThan(x, y, precision); +} + +/****************************************** +*** The special case of the bool type *** +******************************************/ + +template <> +struct scalar_fuzzy_impl { + typedef bool RealScalar; + + template + EIGEN_DEVICE_FUNC static inline bool isMuchSmallerThan(const bool& x, const bool&, const bool&) { + return !x; + } + + EIGEN_DEVICE_FUNC static inline bool isApprox(bool x, bool y, bool) { return x == y; } + + EIGEN_DEVICE_FUNC static inline bool isApproxOrLessThan(const bool& x, const bool& y, const bool&) { + return (!x) || y; + } +}; + +} // end namespace internal + +// Default implementations that rely on other numext implementations +namespace internal { + +// Specialization for complex types that are not supported by std::expm1. +template +struct expm1_impl> { + EIGEN_STATIC_ASSERT_NON_INTEGER(RealScalar) + + EIGEN_DEVICE_FUNC static inline std::complex run(const std::complex& x) { + RealScalar xr = x.real(); + RealScalar xi = x.imag(); + // expm1(z) = exp(z) - 1 + // = exp(x + i * y) - 1 + // = exp(x) * (cos(y) + i * sin(y)) - 1 + // = exp(x) * cos(y) - 1 + i * exp(x) * sin(y) + // Imag(expm1(z)) = exp(x) * sin(y) + // Real(expm1(z)) = exp(x) * cos(y) - 1 + // = exp(x) * cos(y) - 1. + // = expm1(x) + exp(x) * (cos(y) - 1) + // = expm1(x) + exp(x) * (2 * sin(y / 2) ** 2) + RealScalar erm1 = numext::expm1(xr); + RealScalar er = erm1 + RealScalar(1.); + RealScalar sin2 = numext::sin(xi / RealScalar(2.)); + sin2 = sin2 * sin2; + RealScalar s = numext::sin(xi); + RealScalar real_part = erm1 - RealScalar(2.) * er * sin2; + return std::complex(real_part, er * s); + } +}; + +template +struct rsqrt_impl { + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE T run(const T& x) { return T(1) / numext::sqrt(x); } +}; + +#if defined(EIGEN_GPU_COMPILE_PHASE) +template +struct conj_impl, true> { + EIGEN_DEVICE_FUNC static inline std::complex run(const std::complex& x) { + return std::complex(numext::real(x), -numext::imag(x)); + } +}; +#endif + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/MathFunctionsImpl.h b/o-voxel/third_party/eigen/Eigen/src/Core/MathFunctionsImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..e41bd9fac13843118460b8d57723ed23910ade4d --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/MathFunctionsImpl.h @@ -0,0 +1,264 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Pedro Gonnet (pedro.gonnet@gmail.com) +// Copyright (C) 2016 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATHFUNCTIONSIMPL_H +#define EIGEN_MATHFUNCTIONSIMPL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/** \internal Fast reciprocal using Newton-Raphson's method. + + Preconditions: + 1. The starting guess provided in approx_a_recip must have at least half + the leading mantissa bits in the correct result, such that a single + Newton-Raphson step is sufficient to get within 1-2 ulps of the correct + result. + 2. If a is zero, approx_a_recip must be infinite with the same sign as a. + 3. If a is infinite, approx_a_recip must be zero with the same sign as a. + + If the preconditions are satisfied, which they are for the _*_rcp_ps + instructions on x86, the result has a maximum relative error of 2 ulps, + and correctly handles reciprocals of zero, infinity, and NaN. +*/ +template +struct generic_reciprocal_newton_step { + static_assert(Steps > 0, "Steps must be at least 1."); + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_a_recip) { + using Scalar = typename unpacket_traits::type; + const Packet one = pset1(Scalar(1)); + // Refine the approximation using one Newton-Raphson step: + // x_{i} = x_{i-1} * (2 - a * x_{i-1}) + const Packet x = generic_reciprocal_newton_step::run(a, approx_a_recip); + const Packet tmp = pnmadd(a, x, one); + // If tmp is NaN, it means that a is either +/-0 or +/-Inf. + // In this case return the approximation directly. + const Packet is_not_nan = pcmp_eq(tmp, tmp); + // Use two FMAs instead of FMA+FMUL to improve precision. + return pselect(is_not_nan, pmadd(x, tmp, x), x); + } +}; + +template +struct generic_reciprocal_newton_step { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& /*unused*/, const Packet& approx_rsqrt) { + return approx_rsqrt; + } +}; + +/** \internal Fast reciprocal sqrt using Newton-Raphson's method. + + Preconditions: + 1. The starting guess provided in approx_a_recip must have at least half + the leading mantissa bits in the correct result, such that a single + Newton-Raphson step is sufficient to get within 1-2 ulps of the correct + result. + 2. If a is zero, approx_a_recip must be infinite with the same sign as a. + 3. If a is infinite, approx_a_recip must be zero with the same sign as a. + + If the preconditions are satisfied, which they are for the _*_rcp_ps + instructions on x86, the result has a maximum relative error of 2 ulps, + and correctly handles zero, infinity, and NaN. Positive denormals are + treated as zero. +*/ +template +struct generic_rsqrt_newton_step { + static_assert(Steps > 0, "Steps must be at least 1."); + using Scalar = typename unpacket_traits::type; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) { + const Scalar kMinusHalf = Scalar(-1) / Scalar(2); + const Packet cst_minus_half = pset1(kMinusHalf); + const Packet cst_minus_one = pset1(Scalar(-1)); + + Packet inv_sqrt = approx_rsqrt; + for (int step = 0; step < Steps; ++step) { + // Refine the approximation using one Newton-Raphson step: + // h_n = (x * inv_sqrt) * inv_sqrt - 1 (so that h_n is nearly 0). + // inv_sqrt = inv_sqrt - 0.5 * inv_sqrt * h_n + Packet r2 = pmul(a, inv_sqrt); + Packet half_r = pmul(inv_sqrt, cst_minus_half); + Packet h_n = pmadd(r2, inv_sqrt, cst_minus_one); + inv_sqrt = pmadd(half_r, h_n, inv_sqrt); + } + + // If x is NaN, then either: + // 1) the input is NaN + // 2) zero and infinity were multiplied + // In either of these cases, return approx_rsqrt + return pselect(pisnan(inv_sqrt), approx_rsqrt, inv_sqrt); + } +}; + +template +struct generic_rsqrt_newton_step { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& /*unused*/, const Packet& approx_rsqrt) { + return approx_rsqrt; + } +}; + +/** \internal Fast sqrt using Newton-Raphson's method. + + Preconditions: + 1. The starting guess for the reciprocal sqrt provided in approx_rsqrt must + have at least half the leading mantissa bits in the correct result, such + that a single Newton-Raphson step is sufficient to get within 1-2 ulps of + the correct result. + 2. If a is zero, approx_rsqrt must be infinite. + 3. If a is infinite, approx_rsqrt must be zero. + + If the preconditions are satisfied, which they are for the _*_rsqrt_ps + instructions on x86, the result has a maximum relative error of 2 ulps, + and correctly handles zero and infinity, and NaN. Positive denormal inputs + are treated as zero. +*/ +template +struct generic_sqrt_newton_step { + static_assert(Steps > 0, "Steps must be at least 1."); + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Packet run(const Packet& a, const Packet& approx_rsqrt) { + using Scalar = typename unpacket_traits::type; + const Packet one_point_five = pset1(Scalar(1.5)); + const Packet minus_half = pset1(Scalar(-0.5)); + // If a is inf or zero, return a directly. + const Packet inf_mask = pcmp_eq(a, pset1(NumTraits::infinity())); + const Packet return_a = por(pcmp_eq(a, pzero(a)), inf_mask); + // Do a single step of Newton's iteration for reciprocal square root: + // x_{n+1} = x_n * (1.5 + (-0.5 * x_n) * (a * x_n))). + // The Newton's step is computed this way to avoid over/under-flows. + Packet rsqrt = pmul(approx_rsqrt, pmadd(pmul(minus_half, approx_rsqrt), pmul(a, approx_rsqrt), one_point_five)); + for (int step = 1; step < Steps; ++step) { + rsqrt = pmul(rsqrt, pmadd(pmul(minus_half, rsqrt), pmul(a, rsqrt), one_point_five)); + } + + // Return sqrt(x) = x * rsqrt(x) for non-zero finite positive arguments. + // Return a itself for 0 or +inf, NaN for negative arguments. + return pselect(return_a, a, pmul(a, rsqrt)); + } +}; + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE RealScalar positive_real_hypot(const RealScalar& x, const RealScalar& y) { + // IEEE IEC 6059 special cases. + if ((numext::isinf)(x) || (numext::isinf)(y)) return NumTraits::infinity(); + if ((numext::isnan)(x) || (numext::isnan)(y)) return NumTraits::quiet_NaN(); + + EIGEN_USING_STD(sqrt); + RealScalar p, qp; + p = numext::maxi(x, y); + if (numext::is_exactly_zero(p)) return RealScalar(0); + qp = numext::mini(y, x) / p; + return p * sqrt(RealScalar(1) + qp * qp); +} + +template +struct hypot_impl { + typedef typename NumTraits::Real RealScalar; + static EIGEN_DEVICE_FUNC inline RealScalar run(const Scalar& x, const Scalar& y) { + EIGEN_USING_STD(abs); + return positive_real_hypot(abs(x), abs(y)); + } +}; + +// Generic complex sqrt implementation that correctly handles corner cases +// according to https://en.cppreference.com/w/cpp/numeric/complex/sqrt +template +EIGEN_DEVICE_FUNC ComplexT complex_sqrt(const ComplexT& z) { + // Computes the principal sqrt of the input. + // + // For a complex square root of the number x + i*y. We want to find real + // numbers u and v such that + // (u + i*v)^2 = x + i*y <=> + // u^2 - v^2 + i*2*u*v = x + i*v. + // By equating the real and imaginary parts we get: + // u^2 - v^2 = x + // 2*u*v = y. + // + // For x >= 0, this has the numerically stable solution + // u = sqrt(0.5 * (x + sqrt(x^2 + y^2))) + // v = y / (2 * u) + // and for x < 0, + // v = sign(y) * sqrt(0.5 * (-x + sqrt(x^2 + y^2))) + // u = y / (2 * v) + // + // Letting w = sqrt(0.5 * (|x| + |z|)), + // if x == 0: u = w, v = sign(y) * w + // if x > 0: u = w, v = y / (2 * w) + // if x < 0: u = |y| / (2 * w), v = sign(y) * w + using T = typename NumTraits::Real; + const T x = numext::real(z); + const T y = numext::imag(z); + const T zero = T(0); + const T w = numext::sqrt(T(0.5) * (numext::abs(x) + numext::hypot(x, y))); + + return (numext::isinf)(y) ? ComplexT(NumTraits::infinity(), y) + : numext::is_exactly_zero(x) ? ComplexT(w, y < zero ? -w : w) + : x > zero ? ComplexT(w, y / (2 * w)) + : ComplexT(numext::abs(y) / (2 * w), y < zero ? -w : w); +} + +// Generic complex rsqrt implementation. +template +EIGEN_DEVICE_FUNC ComplexT complex_rsqrt(const ComplexT& z) { + // Computes the principal reciprocal sqrt of the input. + // + // For a complex reciprocal square root of the number z = x + i*y. We want to + // find real numbers u and v such that + // (u + i*v)^2 = 1 / (x + i*y) <=> + // u^2 - v^2 + i*2*u*v = x/|z|^2 - i*v/|z|^2. + // By equating the real and imaginary parts we get: + // u^2 - v^2 = x/|z|^2 + // 2*u*v = y/|z|^2. + // + // For x >= 0, this has the numerically stable solution + // u = sqrt(0.5 * (x + |z|)) / |z| + // v = -y / (2 * u * |z|) + // and for x < 0, + // v = -sign(y) * sqrt(0.5 * (-x + |z|)) / |z| + // u = -y / (2 * v * |z|) + // + // Letting w = sqrt(0.5 * (|x| + |z|)), + // if x == 0: u = w / |z|, v = -sign(y) * w / |z| + // if x > 0: u = w / |z|, v = -y / (2 * w * |z|) + // if x < 0: u = |y| / (2 * w * |z|), v = -sign(y) * w / |z| + using T = typename NumTraits::Real; + const T x = numext::real(z); + const T y = numext::imag(z); + const T zero = T(0); + + const T abs_z = numext::hypot(x, y); + const T w = numext::sqrt(T(0.5) * (numext::abs(x) + abs_z)); + const T woz = w / abs_z; + // Corner cases consistent with 1/sqrt(z) on gcc/clang. + return numext::is_exactly_zero(abs_z) ? ComplexT(NumTraits::infinity(), NumTraits::quiet_NaN()) + : ((numext::isinf)(x) || (numext::isinf)(y)) ? ComplexT(zero, zero) + : numext::is_exactly_zero(x) ? ComplexT(woz, y < zero ? woz : -woz) + : x > zero ? ComplexT(woz, -y / (2 * w * abs_z)) + : ComplexT(numext::abs(y) / (2 * w * abs_z), y < zero ? woz : -woz); +} + +template +EIGEN_DEVICE_FUNC ComplexT complex_log(const ComplexT& z) { + // Computes complex log. + using T = typename NumTraits::Real; + T a = numext::abs(z); + EIGEN_USING_STD(atan2); + T b = atan2(z.imag(), z.real()); + return ComplexT(numext::log(a), b); +} + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_MATHFUNCTIONSIMPL_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Matrix.h b/o-voxel/third_party/eigen/Eigen/src/Core/Matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..4daf7176dca9aeee9e8eb2971254de73981d2bf2 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Matrix.h @@ -0,0 +1,534 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIX_H +#define EIGEN_MATRIX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits> { + private: + constexpr static int size = internal::size_at_compile_time(Rows_, Cols_); + typedef typename find_best_packet::type PacketScalar; + enum { + row_major_bit = Options_ & RowMajor ? RowMajorBit : 0, + is_dynamic_size_storage = MaxRows_ == Dynamic || MaxCols_ == Dynamic, + max_size = is_dynamic_size_storage ? Dynamic : MaxRows_ * MaxCols_, + default_alignment = compute_default_alignment::value, + actual_alignment = ((Options_ & DontAlign) == 0) ? default_alignment : 0, + required_alignment = unpacket_traits::alignment, + packet_access_bit = (packet_traits::Vectorizable && + (EIGEN_UNALIGNED_VECTORIZE || (int(actual_alignment) >= int(required_alignment)))) + ? PacketAccessBit + : 0 + }; + + public: + typedef Scalar_ Scalar; + typedef Dense StorageKind; + typedef Eigen::Index StorageIndex; + typedef MatrixXpr XprKind; + enum { + RowsAtCompileTime = Rows_, + ColsAtCompileTime = Cols_, + MaxRowsAtCompileTime = MaxRows_, + MaxColsAtCompileTime = MaxCols_, + Flags = compute_matrix_flags(Options_), + Options = Options_, + InnerStrideAtCompileTime = 1, + OuterStrideAtCompileTime = (int(Options) & int(RowMajor)) ? ColsAtCompileTime : RowsAtCompileTime, + + // FIXME, the following flag in only used to define NeedsToAlign in PlainObjectBase + EvaluatorFlags = LinearAccessBit | DirectAccessBit | packet_access_bit | row_major_bit, + Alignment = actual_alignment + }; +}; +} // namespace internal + +/** \class Matrix + * \ingroup Core_Module + * + * \brief The matrix class, also used for vectors and row-vectors + * + * The %Matrix class is the work-horse for all \em dense (\ref dense "note") matrices and vectors within Eigen. + * Vectors are matrices with one column, and row-vectors are matrices with one row. + * + * The %Matrix class encompasses \em both fixed-size and dynamic-size objects (\ref fixedsize "note"). + * + * The first three template parameters are required: + * \tparam Scalar_ Numeric type, e.g. float, double, int or std::complex. + * User defined scalar types are supported as well (see \ref user_defined_scalars "here"). + * \tparam Rows_ Number of rows, or \b Dynamic + * \tparam Cols_ Number of columns, or \b Dynamic + * + * The remaining template parameters are optional -- in most cases you don't have to worry about them. + * \tparam Options_ A combination of either \b #RowMajor or \b #ColMajor, and of either + * \b #AutoAlign or \b #DontAlign. + * The former controls \ref TopicStorageOrders "storage order", and defaults to column-major. The latter + * controls alignment, which is required for vectorization. It defaults to aligning matrices except for fixed sizes that + * aren't a multiple of the packet size. \tparam MaxRows_ Maximum number of rows. Defaults to \a Rows_ (\ref maxrows + * "note"). \tparam MaxCols_ Maximum number of columns. Defaults to \a Cols_ (\ref maxrows "note"). + * + * Eigen provides a number of typedefs covering the usual cases. Here are some examples: + * + * \li \c Matrix2d is a 2x2 square matrix of doubles (\c Matrix) + * \li \c Vector4f is a vector of 4 floats (\c Matrix) + * \li \c RowVector3i is a row-vector of 3 ints (\c Matrix) + * + * \li \c MatrixXf is a dynamic-size matrix of floats (\c Matrix) + * \li \c VectorXf is a dynamic-size vector of floats (\c Matrix) + * + * \li \c Matrix2Xf is a partially fixed-size (dynamic-size) matrix of floats (\c Matrix) + * \li \c MatrixX3d is a partially dynamic-size (fixed-size) matrix of double (\c Matrix) + * + * See \link matrixtypedefs this page \endlink for a complete list of predefined \em %Matrix and \em Vector typedefs. + * + * You can access elements of vectors and matrices using normal subscripting: + * + * \code + * Eigen::VectorXd v(10); + * v[0] = 0.1; + * v[1] = 0.2; + * v(0) = 0.3; + * v(1) = 0.4; + * + * Eigen::MatrixXi m(10, 10); + * m(0, 1) = 1; + * m(0, 2) = 2; + * m(0, 3) = 3; + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIX_PLUGIN. + * + * Some notes: + * + *
+ *
\anchor dense Dense versus sparse:
+ *
This %Matrix class handles dense, not sparse matrices and vectors. For sparse matrices and vectors, see the + * Sparse module. + * + * Dense matrices and vectors are plain usual arrays of coefficients. All the coefficients are stored, in an ordinary + * contiguous array. This is unlike Sparse matrices and vectors where the coefficients are stored as a list of nonzero + * coefficients.
+ * + *
\anchor fixedsize Fixed-size versus dynamic-size:
+ *
Fixed-size means that the numbers of rows and columns are known at compile-time. In this case, Eigen allocates + * the array of coefficients as a fixed-size array, as a class member. This makes sense for very small matrices, + * typically up to 4x4, sometimes up to 16x16. Larger matrices should be declared as dynamic-size even if one happens to + * know their size at compile-time. + * + * Dynamic-size means that the numbers of rows or columns are not necessarily known at compile-time. In this case they + * are runtime variables, and the array of coefficients is allocated dynamically on the heap. + * + * Note that \em dense matrices, be they Fixed-size or Dynamic-size, do not expand dynamically in the sense of + * a std::map. If you want this behavior, see the Sparse module.
+ * + *
\anchor maxrows MaxRows_ and MaxCols_:
+ *
In most cases, one just leaves these parameters to the default values. + * These parameters mean the maximum size of rows and columns that the matrix may have. They are useful in cases + * when the exact numbers of rows and columns are not known at compile-time, but it is known at compile-time that they + * cannot exceed a certain value. This happens when taking dynamic-size blocks inside fixed-size matrices: in this case + * MaxRows_ and MaxCols_ are the dimensions of the original matrix, while Rows_ and Cols_ are Dynamic.
+ *
+ * + * ABI and storage layout + * + * The table below summarizes the ABI of some possible Matrix instances which is fixed thorough the lifetime of Eigen 3. + * + * + * + * + * + * + *
Matrix typeEquivalent C structure
\code Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index rows, cols; + * }; + * \endcode
\code + * Matrix + * Matrix \endcode\code + * struct { + * T *data; // with (size_t(data)%EIGEN_MAX_ALIGN_BYTES)==0 + * Eigen::Index size; + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[Rows*Cols]; // with (size_t(data)%A(Rows*Cols*sizeof(T)))==0 + * }; + * \endcode
\code Matrix \endcode\code + * struct { + * T data[MaxRows*MaxCols]; // with (size_t(data)%A(MaxRows*MaxCols*sizeof(T)))==0 + * Eigen::Index rows, cols; + * }; + * \endcode
+ * Note that in this table Rows, Cols, MaxRows and MaxCols are all positive integers. A(S) is defined to the largest + * possible power-of-two smaller to EIGEN_MAX_STATIC_ALIGN_BYTES. + * + * \see MatrixBase for the majority of the API methods for matrices, \ref TopicClassHierarchy, + * \ref TopicStorageOrders + */ + +template +class Matrix : public PlainObjectBase> { + public: + /** \brief Base class typedef. + * \sa PlainObjectBase + */ + typedef PlainObjectBase Base; + + enum { Options = Options_ }; + + EIGEN_DENSE_PUBLIC_INTERFACE(Matrix) + + typedef typename Base::PlainObject PlainObject; + + using Base::base; + using Base::coeffRef; + + /** + * \brief Assigns matrices to each other. + * + * \note This is a special case of the templated operator=. Its purpose is + * to prevent a default operator= from hiding the templated operator=. + * + * \callgraph + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(const Matrix& other) { return Base::_set(other); } + + /** \internal + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const DenseBase& other) { + return Base::_set(other); + } + + /** + * \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const EigenBase& other) { + return Base::operator=(other); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix& operator=(const ReturnByValue& func) { + return Base::operator=(func); + } + + /** \brief Default constructor. + * + * For fixed-size matrices, does nothing. + * + * For dynamic-size matrices, creates an empty matrix of size 0. Does not allocate any array. Such a matrix + * is called a null matrix. This constructor is the unique way to create null matrices: resizing + * a matrix to 0 is not supported. + * + * \sa resize(Index,Index) + */ +#if defined(EIGEN_INITIALIZE_COEFFS) + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() { EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED } +#else + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix() = default; +#endif + /** \brief Move constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(Matrix&&) = default; + /** \brief Moves the matrix into the other one. + * + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix& operator=(Matrix&& other) noexcept( + std::is_nothrow_move_assignable::value) { + Base::operator=(std::move(other)); + return *this; + } + + /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. + * + * \only_for_vectors + * + * This constructor is for 1D array or vectors with more than 4 coefficients. + * + * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this + * constructor must match the the fixed number of rows (resp. columns) of \c *this. + * + * + * Example: \include Matrix_variadic_ctor_cxx11.cpp + * Output: \verbinclude Matrix_variadic_ctor_cxx11.out + * + * \sa Matrix(const std::initializer_list>&) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, + const ArgTypes&... args) + : Base(a0, a1, a2, a3, args...) {} + + /** \brief Constructs a Matrix and initializes it from the coefficients given as initializer-lists grouped by row. + * \cpp11 + * \anchor matrix_initializer_list + * + * In the general case, the constructor takes a list of rows, each row being represented as a list of coefficients: + * + * Example: \include Matrix_initializer_list_23_cxx11.cpp + * Output: \verbinclude Matrix_initializer_list_23_cxx11.out + * + * Each of the inner initializer lists must contain the exact same number of elements, otherwise an assertion is + * triggered. + * + * In the case of a compile-time column vector, implicit transposition from a single row is allowed. + * Therefore VectorXd{{1,2,3,4,5}} is legal and the more verbose syntax + * RowVectorXd{{1},{2},{3},{4},{5}} can be avoided: + * + * Example: \include Matrix_initializer_list_vector_cxx11.cpp + * Output: \verbinclude Matrix_initializer_list_vector_cxx11.out + * + * In the case of fixed-sized matrices, the initializer list sizes must exactly match the matrix sizes, + * and implicit transposition is allowed for compile-time vectors only. + * + * \sa Matrix(const Scalar& a0, const Scalar& a1, const Scalar& a2, const Scalar& a3, const ArgTypes&... args) + */ + EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE Matrix( + const std::initializer_list>& list) + : Base(list) {} + +#ifndef EIGEN_PARSED_BY_DOXYGEN + + // This constructor is for both 1x1 matrices and dynamic vectors + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit Matrix(const T& x) { + Base::template _init1(x); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const T0& x, const T1& y) { + Base::template _init2(x, y); + } + +#else + /** \brief Constructs a fixed-sized matrix initialized with coefficients starting at \a data */ + EIGEN_DEVICE_FUNC explicit Matrix(const Scalar* data); + + /** \brief Constructs a vector or row-vector with given dimension. \only_for_vectors + * + * This is useful for dynamic-size vectors. For fixed-size vectors, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x1 matrices. For instance, + * calling Matrix(1) will call the initialization constructor: Matrix(const Scalar&). + * For fixed-size \c 1x1 matrices it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_STRONG_INLINE explicit Matrix(Index dim); + /** \brief Constructs an initialized 1x1 matrix with the given coefficient + * \sa Matrix(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&...) */ + Matrix(const Scalar& x); + /** \brief Constructs an uninitialized matrix with \a rows rows and \a cols columns. + * + * This is useful for dynamic-size matrices. For fixed-size matrices, + * it is redundant to pass these parameters, so one should use the default constructor + * Matrix() instead. + * + * \warning This constructor is disabled for fixed-size \c 1x2 and \c 2x1 vectors. For instance, + * calling Matrix2f(2,1) will call the initialization constructor: Matrix(const Scalar& x, const Scalar& y). + * For fixed-size \c 1x2 or \c 2x1 vectors it is therefore recommended to use the default + * constructor Matrix() instead, especially when using one of the non standard + * \c EIGEN_INITIALIZE_MATRICES_BY_{ZERO,\c NAN} macros (see \ref TopicPreprocessorDirectives). + */ + EIGEN_DEVICE_FUNC Matrix(Index rows, Index cols); + + /** \brief Constructs an initialized 2D vector with given coefficients + * \sa Matrix(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&...) */ + Matrix(const Scalar& x, const Scalar& y); +#endif // end EIGEN_PARSED_BY_DOXYGEN + + /** \brief Constructs an initialized 3D vector with given coefficients + * \sa Matrix(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&...) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 3) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + } + /** \brief Constructs an initialized 4D vector with given coefficients + * \sa Matrix(const Scalar&, const Scalar&, const Scalar&, const Scalar&, const ArgTypes&...) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const Scalar& x, const Scalar& y, const Scalar& z, const Scalar& w) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(Matrix, 4) + m_storage.data()[0] = x; + m_storage.data()[1] = y; + m_storage.data()[2] = z; + m_storage.data()[3] = w; + } + + /** \brief Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Matrix(const Matrix&) = default; + + /** \brief Copy constructor for generic expressions. + * \sa MatrixBase::operator=(const EigenBase&) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Matrix(const EigenBase& other) : Base(other.derived()) {} + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return 1; } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return this->innerSize(); } + + /////////// Geometry module /////////// + + template + EIGEN_DEVICE_FUNC explicit Matrix(const RotationBase& r); + template + EIGEN_DEVICE_FUNC Matrix& operator=(const RotationBase& r); + +// allow to extend Matrix outside Eigen +#ifdef EIGEN_MATRIX_PLUGIN +#include EIGEN_MATRIX_PLUGIN +#endif + + protected: + template + friend struct internal::conservative_resize_like_impl; + + using Base::m_storage; +}; + +/** \defgroup matrixtypedefs Global matrix typedefs + * + * \ingroup Core_Module + * + * %Eigen defines several typedef shortcuts for most common matrix and vector types. + * + * The general patterns are the following: + * + * \c MatrixSizeType where \c Size can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size, + * and where \c Type can be \c i for integer, \c f for float, \c d for double, \c cf for complex float, \c cd + * for complex double. + * + * For example, \c Matrix3d is a fixed-size 3x3 matrix type of doubles, and \c MatrixXf is a dynamic-size matrix of + * floats. + * + * There are also \c VectorSizeType and \c RowVectorSizeType which are self-explanatory. For example, \c Vector4cf is + * a fixed-size vector of 4 complex floats. + * + * With \cpp11, template alias are also defined for common sizes. + * They follow the same pattern as above except that the scalar type suffix is replaced by a + * template parameter, i.e.: + * - `MatrixSize` where `Size` can be \c 2,\c 3,\c 4 for fixed size square matrices or \c X for dynamic size. + * - `MatrixXSize` and `MatrixSizeX` where `Size` can be \c 2,\c 3,\c 4 for hybrid dynamic/fixed matrices. + * - `VectorSize` and `RowVectorSize` for column and row vectors. + * + * With \cpp11, you can also use fully generic column and row vector types: `Vector` and + * `RowVector`. + * + * \sa class Matrix + */ + +#define EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Size, SizeSuffix) \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Size`×`Size` matrix of type `Type`. */ \ + typedef Matrix Matrix##SizeSuffix##TypeSuffix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Size`×`1` vector of type `Type`. */ \ + typedef Matrix Vector##SizeSuffix##TypeSuffix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief `1`×`Size` vector of type `Type`. */ \ + typedef Matrix RowVector##SizeSuffix##TypeSuffix; + +#define EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, Size) \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Size`×`Dynamic` matrix of type `Type`. */ \ + typedef Matrix Matrix##Size##X##TypeSuffix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief `Dynamic`×`Size` matrix of type `Type`. */ \ + typedef Matrix Matrix##X##Size##TypeSuffix; + +#define EIGEN_MAKE_TYPEDEFS_ALL_SIZES(Type, TypeSuffix) \ + EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 2, 2) \ + EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 3, 3) \ + EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, 4, 4) \ + EIGEN_MAKE_TYPEDEFS(Type, TypeSuffix, Dynamic, X) \ + EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 2) \ + EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 3) \ + EIGEN_MAKE_FIXED_TYPEDEFS(Type, TypeSuffix, 4) + +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(int, i) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(float, f) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(double, d) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cf) +EIGEN_MAKE_TYPEDEFS_ALL_SIZES(std::complex, cd) + +#undef EIGEN_MAKE_TYPEDEFS_ALL_SIZES +#undef EIGEN_MAKE_TYPEDEFS +#undef EIGEN_MAKE_FIXED_TYPEDEFS + +#define EIGEN_MAKE_TYPEDEFS(Size, SizeSuffix) \ + /** \ingroup matrixtypedefs */ \ + /** \brief \cpp11 `Size`×`Size` matrix of type `Type`.*/ \ + template \ + using Matrix##SizeSuffix = Matrix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief \cpp11 `Size`×`1` vector of type `Type`.*/ \ + template \ + using Vector##SizeSuffix = Matrix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief \cpp11 `1`×`Size` vector of type `Type`.*/ \ + template \ + using RowVector##SizeSuffix = Matrix; + +#define EIGEN_MAKE_FIXED_TYPEDEFS(Size) \ + /** \ingroup matrixtypedefs */ \ + /** \brief \cpp11 `Size`×`Dynamic` matrix of type `Type` */ \ + template \ + using Matrix##Size##X = Matrix; \ + /** \ingroup matrixtypedefs */ \ + /** \brief \cpp11 `Dynamic`×`Size` matrix of type `Type`. */ \ + template \ + using Matrix##X##Size = Matrix; + +EIGEN_MAKE_TYPEDEFS(2, 2) +EIGEN_MAKE_TYPEDEFS(3, 3) +EIGEN_MAKE_TYPEDEFS(4, 4) +EIGEN_MAKE_TYPEDEFS(Dynamic, X) +EIGEN_MAKE_FIXED_TYPEDEFS(2) +EIGEN_MAKE_FIXED_TYPEDEFS(3) +EIGEN_MAKE_FIXED_TYPEDEFS(4) + +/** \ingroup matrixtypedefs + * \brief \cpp11 `Size`×`1` vector of type `Type`. */ +template +using Vector = Matrix; + +/** \ingroup matrixtypedefs + * \brief \cpp11 `1`×`Size` vector of type `Type`. */ +template +using RowVector = Matrix; + +#undef EIGEN_MAKE_TYPEDEFS +#undef EIGEN_MAKE_FIXED_TYPEDEFS + +} // end namespace Eigen + +#endif // EIGEN_MATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/MatrixBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/MatrixBase.h new file mode 100644 index 0000000000000000000000000000000000000000..6993a7de5d361f48c22edba05006702a610ac463 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/MatrixBase.h @@ -0,0 +1,545 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2009 Benoit Jacob +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_MATRIXBASE_H +#define EIGEN_MATRIXBASE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class MatrixBase + * \ingroup Core_Module + * + * \brief Base class for all dense matrices, vectors, and expressions + * + * This class is the base that is inherited by all matrix, vector, and related expression + * types. Most of the Eigen API is contained in this class, and its base classes. Other important + * classes for the Eigen API are Matrix, and VectorwiseOp. + * + * Note that some methods are defined in other modules such as the \ref LU_Module LU module + * for all functions related to matrix inversions. + * + * \tparam Derived is the derived type, e.g. a matrix type, or an expression, etc. + * + * When writing a function taking Eigen objects as argument, if you want your function + * to take as argument any matrix, vector, or expression, just let it take a + * MatrixBase argument. As an example, here is a function printFirstRow which, given + * a matrix, vector, or expression \a x, prints the first row of \a x. + * + * \code + template + void printFirstRow(const Eigen::MatrixBase& x) + { + cout << x.row(0) << endl; + } + * \endcode + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_MATRIXBASE_PLUGIN. + * + * \sa \blank \ref TopicClassHierarchy + */ +template +class MatrixBase : public DenseBase { + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef MatrixBase StorageBaseType; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + + typedef DenseBase Base; + using Base::ColsAtCompileTime; + using Base::Flags; + using Base::IsVectorAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::RowsAtCompileTime; + using Base::SizeAtCompileTime; + + using Base::coeff; + using Base::coeffRef; + using Base::cols; + using Base::const_cast_derived; + using Base::derived; + using Base::eval; + using Base::lazyAssign; + using Base::rows; + using Base::size; + using Base::operator-; + using Base::operator+=; + using Base::operator-=; + using Base::operator*=; + using Base::operator/=; + + typedef typename Base::CoeffReturnType CoeffReturnType; + typedef typename Base::ConstTransposeReturnType ConstTransposeReturnType; + typedef typename Base::RowXpr RowXpr; + typedef typename Base::ColXpr ColXpr; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** type of the equivalent square matrix */ + typedef Matrix + SquareMatrixType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + + /** \returns the size of the main diagonal, which is min(rows(),cols()). + * \sa rows(), cols(), SizeAtCompileTime. */ + EIGEN_DEVICE_FUNC inline Index diagonalSize() const { return (numext::mini)(rows(), cols()); } + + typedef typename Base::PlainObject PlainObject; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal Represents a matrix with all coefficients equal to one another*/ + typedef CwiseNullaryOp, PlainObject> ConstantReturnType; + /** \internal the return type of MatrixBase::adjoint() */ + typedef std::conditional_t::IsComplex, + CwiseUnaryOp, ConstTransposeReturnType>, + ConstTransposeReturnType> + AdjointReturnType; + /** \internal Return type of eigenvalues() */ + typedef Matrix, internal::traits::ColsAtCompileTime, 1, ColMajor> + EigenvaluesReturnType; + /** \internal the return type of identity */ + typedef CwiseNullaryOp, PlainObject> IdentityReturnType; + /** \internal the return type of unit vectors */ + typedef Block, SquareMatrixType>, + internal::traits::RowsAtCompileTime, internal::traits::ColsAtCompileTime> + BasisReturnType; +#endif // not EIGEN_PARSED_BY_DOXYGEN + +#define EIGEN_CURRENT_STORAGE_BASE_CLASS Eigen::MatrixBase +#define EIGEN_DOC_UNARY_ADDONS(X, Y) +#include "../plugins/CommonCwiseBinaryOps.inc" +#include "../plugins/MatrixCwiseUnaryOps.inc" +#include "../plugins/MatrixCwiseBinaryOps.inc" +#ifdef EIGEN_MATRIXBASE_PLUGIN +#include EIGEN_MATRIXBASE_PLUGIN +#endif +#undef EIGEN_CURRENT_STORAGE_BASE_CLASS +#undef EIGEN_DOC_UNARY_ADDONS + + /** Special case of the template operator=, in order to prevent the compiler + * from generating a default operator= (issue hit with g++ 4.1) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const MatrixBase& other); + + // We cannot inherit here via Base::operator= since it is causing + // trouble with MSVC. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const DenseBase& other); + + template + EIGEN_DEVICE_FUNC Derived& operator=(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC Derived& operator=(const ReturnByValue& other); + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator+=(const MatrixBase& other); + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator-=(const MatrixBase& other); + + template + EIGEN_DEVICE_FUNC const Product operator*(const MatrixBase& other) const; + + template + EIGEN_DEVICE_FUNC const Product lazyProduct( + const MatrixBase& other) const; + + template + Derived& operator*=(const EigenBase& other); + + template + void applyOnTheLeft(const EigenBase& other); + + template + void applyOnTheRight(const EigenBase& other); + + template + EIGEN_DEVICE_FUNC const Product operator*( + const DiagonalBase& diagonal) const; + + template + EIGEN_DEVICE_FUNC const Product operator*( + const SkewSymmetricBase& skew) const; + + template + EIGEN_DEVICE_FUNC typename ScalarBinaryOpTraits::Scalar, + typename internal::traits::Scalar>::ReturnType + dot(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC RealScalar squaredNorm() const; + EIGEN_DEVICE_FUNC RealScalar norm() const; + RealScalar stableNorm() const; + RealScalar blueNorm() const; + RealScalar hypotNorm() const; + EIGEN_DEVICE_FUNC const PlainObject normalized() const; + EIGEN_DEVICE_FUNC const PlainObject stableNormalized() const; + EIGEN_DEVICE_FUNC void normalize(); + EIGEN_DEVICE_FUNC void stableNormalize(); + + EIGEN_DEVICE_FUNC const AdjointReturnType adjoint() const; + EIGEN_DEVICE_FUNC void adjointInPlace(); + + typedef Diagonal DiagonalReturnType; + EIGEN_DEVICE_FUNC DiagonalReturnType diagonal(); + + typedef Diagonal ConstDiagonalReturnType; + EIGEN_DEVICE_FUNC const ConstDiagonalReturnType diagonal() const; + + template + EIGEN_DEVICE_FUNC Diagonal diagonal(); + + template + EIGEN_DEVICE_FUNC const Diagonal diagonal() const; + + EIGEN_DEVICE_FUNC Diagonal diagonal(Index index); + EIGEN_DEVICE_FUNC const Diagonal diagonal(Index index) const; + + template + struct TriangularViewReturnType { + typedef TriangularView Type; + }; + template + struct ConstTriangularViewReturnType { + typedef const TriangularView Type; + }; + + template + EIGEN_DEVICE_FUNC typename TriangularViewReturnType::Type triangularView(); + template + EIGEN_DEVICE_FUNC typename ConstTriangularViewReturnType::Type triangularView() const; + + template + struct SelfAdjointViewReturnType { + typedef SelfAdjointView Type; + }; + template + struct ConstSelfAdjointViewReturnType { + typedef const SelfAdjointView Type; + }; + + template + EIGEN_DEVICE_FUNC typename SelfAdjointViewReturnType::Type selfadjointView(); + template + EIGEN_DEVICE_FUNC typename ConstSelfAdjointViewReturnType::Type selfadjointView() const; + + const SparseView sparseView( + const Scalar& m_reference = Scalar(0), + const typename NumTraits::Real& m_epsilon = NumTraits::dummy_precision()) const; + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(); + EIGEN_DEVICE_FUNC static const IdentityReturnType Identity(Index rows, Index cols); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index size, Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType Unit(Index i); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitX(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitY(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitZ(); + EIGEN_DEVICE_FUNC static const BasisReturnType UnitW(); + + EIGEN_DEVICE_FUNC const DiagonalWrapper asDiagonal() const; + const PermutationWrapper asPermutation() const; + EIGEN_DEVICE_FUNC const SkewSymmetricWrapper asSkewSymmetric() const; + + EIGEN_DEVICE_FUNC Derived& setIdentity(); + EIGEN_DEVICE_FUNC Derived& setIdentity(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setUnit(Index i); + EIGEN_DEVICE_FUNC Derived& setUnit(Index newSize, Index i); + + bool isIdentity(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isDiagonal(const RealScalar& prec = NumTraits::dummy_precision()) const; + + bool isUpperTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isLowerTriangular(const RealScalar& prec = NumTraits::dummy_precision()) const; + + bool isSkewSymmetric(const RealScalar& prec = NumTraits::dummy_precision()) const; + + template + bool isOrthogonal(const MatrixBase& other, + const RealScalar& prec = NumTraits::dummy_precision()) const; + bool isUnitary(const RealScalar& prec = NumTraits::dummy_precision()) const; + + /** \returns true if each coefficients of \c *this and \a other are all exactly equal. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator!= */ + template + EIGEN_DEVICE_FUNC inline bool operator==(const MatrixBase& other) const { + return (this->rows() == other.rows()) && (this->cols() == other.cols()) && cwiseEqual(other).all(); + } + + /** \returns true if at least one pair of coefficients of \c *this and \a other are not exactly equal to each other. + * \warning When using floating point scalar values you probably should rather use a + * fuzzy comparison such as isApprox() + * \sa isApprox(), operator== */ + template + EIGEN_DEVICE_FUNC inline bool operator!=(const MatrixBase& other) const { + return !(*this == other); + } + + NoAlias EIGEN_DEVICE_FUNC noalias(); + + // TODO forceAlignedAccess is temporarily disabled + // Need to find a nicer workaround. + inline const Derived& forceAlignedAccess() const { return derived(); } + inline Derived& forceAlignedAccess() { return derived(); } + template + inline const Derived& forceAlignedAccessIf() const { + return derived(); + } + template + inline Derived& forceAlignedAccessIf() { + return derived(); + } + + EIGEN_DEVICE_FUNC Scalar trace() const; + + template + EIGEN_DEVICE_FUNC RealScalar lpNorm() const; + + EIGEN_DEVICE_FUNC MatrixBase& matrix() { return *this; } + EIGEN_DEVICE_FUNC const MatrixBase& matrix() const { return *this; } + + /** \returns an \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ArrayWrapper array() { return ArrayWrapper(derived()); } + /** \returns a const \link Eigen::ArrayBase Array \endlink expression of this matrix + * \sa ArrayBase::matrix() */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const ArrayWrapper array() const { + return ArrayWrapper(derived()); + } + + /////////// LU module /////////// + + template + inline const FullPivLU fullPivLu() const; + template + inline const PartialPivLU partialPivLu() const; + + template + inline const PartialPivLU lu() const; + + EIGEN_DEVICE_FUNC inline const Inverse inverse() const; + + template + inline void computeInverseAndDetWithCheck( + ResultType& inverse, typename ResultType::Scalar& determinant, bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision()) const; + + template + inline void computeInverseWithCheck( + ResultType& inverse, bool& invertible, + const RealScalar& absDeterminantThreshold = NumTraits::dummy_precision()) const; + + EIGEN_DEVICE_FUNC Scalar determinant() const; + + /////////// Cholesky module /////////// + + inline const LLT llt() const; + inline const LDLT ldlt() const; + + /////////// QR module /////////// + + inline const HouseholderQR householderQr() const; + template + inline const ColPivHouseholderQR colPivHouseholderQr() const; + template + inline const FullPivHouseholderQR fullPivHouseholderQr() const; + template + inline const CompleteOrthogonalDecomposition completeOrthogonalDecomposition() const; + + /////////// Eigenvalues module /////////// + + inline EigenvaluesReturnType eigenvalues() const; + inline RealScalar operatorNorm() const; + + /////////// SVD module /////////// + + template + inline JacobiSVD jacobiSvd() const; + template + EIGEN_DEPRECATED_WITH_REASON("Options should be specified using method's template parameter.") + inline JacobiSVD jacobiSvd(unsigned int computationOptions) const; + + template + inline BDCSVD bdcSvd() const; + template + EIGEN_DEPRECATED_WITH_REASON("Options should be specified using method's template parameter.") + inline BDCSVD bdcSvd(unsigned int computationOptions) const; + + /////////// Geometry module /////////// + + template + EIGEN_DEVICE_FUNC inline typename internal::cross_impl::return_type cross( + const MatrixBase& other) const; + + template + EIGEN_DEVICE_FUNC inline PlainObject cross3(const MatrixBase& other) const; + + EIGEN_DEVICE_FUNC inline PlainObject unitOrthogonal(void) const; + + EIGEN_DEPRECATED_WITH_REASON("Use .canonicalEulerAngles() instead.") + EIGEN_DEVICE_FUNC inline Matrix eulerAngles(Index a0, Index a1, Index a2) const; + + EIGEN_DEVICE_FUNC inline Matrix canonicalEulerAngles(Index a0, Index a1, Index a2) const; + + // put this as separate enum value to work around possible GCC 4.3 bug (?) + enum { + HomogeneousReturnTypeDirection = + ColsAtCompileTime == 1 && RowsAtCompileTime == 1 + ? ((internal::traits::Flags & RowMajorBit) == RowMajorBit ? Horizontal : Vertical) + : ColsAtCompileTime == 1 ? Vertical + : Horizontal + }; + typedef Homogeneous HomogeneousReturnType; + EIGEN_DEVICE_FUNC inline HomogeneousReturnType homogeneous() const; + + enum { SizeMinusOne = SizeAtCompileTime == Dynamic ? Dynamic : SizeAtCompileTime - 1 }; + typedef Block::ColsAtCompileTime == 1 ? SizeMinusOne : 1, + internal::traits::ColsAtCompileTime == 1 ? 1 : SizeMinusOne> + ConstStartMinusOne; + typedef EIGEN_EXPR_BINARYOP_SCALAR_RETURN_TYPE(ConstStartMinusOne, Scalar, quotient) HNormalizedReturnType; + EIGEN_DEVICE_FUNC inline const HNormalizedReturnType hnormalized() const; + + ////////// Householder module /////////// + + EIGEN_DEVICE_FUNC void makeHouseholderInPlace(Scalar& tau, RealScalar& beta); + template + EIGEN_DEVICE_FUNC void makeHouseholder(EssentialPart& essential, Scalar& tau, RealScalar& beta) const; + template + EIGEN_DEVICE_FUNC void applyHouseholderOnTheLeft(const EssentialPart& essential, const Scalar& tau, + Scalar* workspace); + template + EIGEN_DEVICE_FUNC void applyHouseholderOnTheRight(const EssentialPart& essential, const Scalar& tau, + Scalar* workspace); + + ///////// Jacobi module ///////// + + template + EIGEN_DEVICE_FUNC void applyOnTheLeft(Index p, Index q, const JacobiRotation& j); + template + EIGEN_DEVICE_FUNC void applyOnTheRight(Index p, Index q, const JacobiRotation& j); + + ///////// SparseCore module ///////// + + template + EIGEN_STRONG_INLINE const typename SparseMatrixBase::template CwiseProductDenseReturnType::Type + cwiseProduct(const SparseMatrixBase& other) const { + return other.cwiseProduct(derived()); + } + + ///////// MatrixFunctions module ///////// + + typedef typename internal::stem_function::type StemFunction; +#define EIGEN_MATRIX_FUNCTION(ReturnType, Name, Description) \ + /** \returns an expression of the matrix Description of \c *this. \brief This function requires the unsupported MatrixFunctions module. To compute the \ + * coefficient-wise Description use ArrayBase::##Name . */ \ + const ReturnType Name() const; +#define EIGEN_MATRIX_FUNCTION_1(ReturnType, Name, Description, Argument) \ + /** \returns an expression of the matrix Description of \c *this. \brief This function requires the unsupported MatrixFunctions module. To compute the \ + * coefficient-wise Description use ArrayBase::##Name . */ \ + const ReturnType Name(Argument) const; + + EIGEN_MATRIX_FUNCTION(MatrixExponentialReturnValue, exp, exponential) + /** \brief Helper function for the unsupported + * MatrixFunctions module.*/ + const MatrixFunctionReturnValue matrixFunction(StemFunction f) const; + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cosh, hyperbolic cosine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sinh, hyperbolic sine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, atanh, inverse hyperbolic cosine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, acosh, inverse hyperbolic cosine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, asinh, inverse hyperbolic sine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, cos, cosine) + EIGEN_MATRIX_FUNCTION(MatrixFunctionReturnValue, sin, sine) + EIGEN_MATRIX_FUNCTION(MatrixSquareRootReturnValue, sqrt, square root) + EIGEN_MATRIX_FUNCTION(MatrixLogarithmReturnValue, log, logarithm) + EIGEN_MATRIX_FUNCTION_1(MatrixPowerReturnValue, pow, power to \c p, const RealScalar& p) + EIGEN_MATRIX_FUNCTION_1(MatrixComplexPowerReturnValue, pow, power to \c p, const internal::make_complex_t& p) + + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(MatrixBase) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(MatrixBase) + + private: + EIGEN_DEVICE_FUNC explicit MatrixBase(int); + EIGEN_DEVICE_FUNC MatrixBase(int, int); + template + EIGEN_DEVICE_FUNC explicit MatrixBase(const MatrixBase&); + + protected: + // mixing arrays and matrices is not legal + template + Derived& operator+=(const ArrayBase&) { + EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar)) == -1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); + return *this; + } + // mixing arrays and matrices is not legal + template + Derived& operator-=(const ArrayBase&) { + EIGEN_STATIC_ASSERT(std::ptrdiff_t(sizeof(typename OtherDerived::Scalar)) == -1, + YOU_CANNOT_MIX_ARRAYS_AND_MATRICES); + return *this; + } +}; + +/*************************************************************************** + * Implementation of matrix base methods + ***************************************************************************/ + +/** replaces \c *this by \c *this * \a other. + * + * \returns a reference to \c *this + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline Derived& MatrixBase::operator*=(const EigenBase& other) { + other.derived().applyThisOnTheRight(derived()); + return derived(); +} + +/** replaces \c *this by \c *this * \a other. It is equivalent to MatrixBase::operator*=(). + * + * Example: \include MatrixBase_applyOnTheRight.cpp + * Output: \verbinclude MatrixBase_applyOnTheRight.out + */ +template +template +inline void MatrixBase::applyOnTheRight(const EigenBase& other) { + other.derived().applyThisOnTheRight(derived()); +} + +/** replaces \c *this by \a other * \c *this. + * + * Example: \include MatrixBase_applyOnTheLeft.cpp + * Output: \verbinclude MatrixBase_applyOnTheLeft.out + */ +template +template +inline void MatrixBase::applyOnTheLeft(const EigenBase& other) { + other.derived().applyThisOnTheLeft(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_MATRIXBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/NestByValue.h b/o-voxel/third_party/eigen/Eigen/src/Core/NestByValue.h new file mode 100644 index 0000000000000000000000000000000000000000..1209de1a35ba9cd9bc9d705384c1f252c8a72ad0 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/NestByValue.h @@ -0,0 +1,91 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NESTBYVALUE_H +#define EIGEN_NESTBYVALUE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : public traits { + enum { Flags = traits::Flags & ~NestByRefBit }; +}; +} // namespace internal + +/** \class NestByValue + * \ingroup Core_Module + * + * \brief Expression which must be nested by value + * + * \tparam ExpressionType the type of the object of which we are requiring nesting-by-value + * + * This class is the return type of MatrixBase::nestByValue() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::nestByValue() + */ +template +class NestByValue : public internal::dense_xpr_base >::type { + public: + typedef typename internal::dense_xpr_base::type Base; + static constexpr bool HasDirectAccess = internal::has_direct_access::ret; + + EIGEN_DENSE_PUBLIC_INTERFACE(NestByValue) + + EIGEN_DEVICE_FUNC explicit inline NestByValue(const ExpressionType& matrix) : m_expression(matrix) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_expression.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_expression.cols(); } + + EIGEN_DEVICE_FUNC operator const ExpressionType&() const { return m_expression; } + + EIGEN_DEVICE_FUNC const ExpressionType& nestedExpression() const { return m_expression; } + + EIGEN_DEVICE_FUNC typename std::enable_if::type data() const { + return m_expression.data(); + } + + EIGEN_DEVICE_FUNC typename std::enable_if::type innerStride() const { + return m_expression.innerStride(); + } + + EIGEN_DEVICE_FUNC typename std::enable_if::type outerStride() const { + return m_expression.outerStride(); + } + + protected: + const ExpressionType m_expression; +}; + +/** \returns an expression of the temporary version of *this. + */ +template +EIGEN_DEVICE_FUNC inline const NestByValue DenseBase::nestByValue() const { + return NestByValue(derived()); +} + +namespace internal { + +// Evaluator of Solve -> eval into a temporary +template +struct evaluator > : public evaluator { + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const NestByValue& xpr) : Base(xpr.nestedExpression()) {} +}; +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_NESTBYVALUE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/NoAlias.h b/o-voxel/third_party/eigen/Eigen/src/Core/NoAlias.h new file mode 100644 index 0000000000000000000000000000000000000000..64d8c562942363a02ca04322dd1eb64e126456b9 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/NoAlias.h @@ -0,0 +1,102 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NOALIAS_H +#define EIGEN_NOALIAS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class NoAlias + * \ingroup Core_Module + * + * \brief Pseudo expression providing an operator = assuming no aliasing + * + * \tparam ExpressionType the type of the object on which to do the lazy assignment + * + * This class represents an expression with special assignment operators + * assuming no aliasing between the target expression and the source expression. + * More precisely it alloas to bypass the EvalBeforeAssignBit flag of the source expression. + * It is the return type of MatrixBase::noalias() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::noalias() + */ +template class StorageBase> +class NoAlias { + public: + typedef typename ExpressionType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit NoAlias(ExpressionType& expression) : m_expression(expression) {} + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator=(const StorageBase& other) { + call_assignment_no_alias(m_expression, other.derived(), + internal::assign_op()); + return m_expression; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator+=(const StorageBase& other) { + call_assignment_no_alias(m_expression, other.derived(), + internal::add_assign_op()); + return m_expression; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ExpressionType& operator-=(const StorageBase& other) { + call_assignment_no_alias(m_expression, other.derived(), + internal::sub_assign_op()); + return m_expression; + } + + EIGEN_DEVICE_FUNC ExpressionType& expression() const { return m_expression; } + + protected: + ExpressionType& m_expression; +}; + +/** \returns a pseudo expression of \c *this with an operator= assuming + * no aliasing between \c *this and the source expression. + * + * More precisely, noalias() allows to bypass the EvalBeforeAssignBit flag. + * Currently, even though several expressions may alias, only product + * expressions have this flag. Therefore, noalias() is only useful when + * the source expression contains a matrix product. + * + * Here are some examples where noalias is useful: + * \code + * D.noalias() = A * B; + * D.noalias() += A.transpose() * B; + * D.noalias() -= 2 * A * B.adjoint(); + * \endcode + * + * On the other hand the following example will lead to a \b wrong result: + * \code + * A.noalias() = A * B; + * \endcode + * because the result matrix A is also an operand of the matrix product. Therefore, + * there is no alternative than evaluating A * B in a temporary, that is the default + * behavior when you write: + * \code + * A = A * B; + * \endcode + * + * \sa class NoAlias + */ +template +NoAlias EIGEN_DEVICE_FUNC MatrixBase::noalias() { + return NoAlias(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_NOALIAS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/NumTraits.h b/o-voxel/third_party/eigen/Eigen/src/Core/NumTraits.h new file mode 100644 index 0000000000000000000000000000000000000000..505d5b1c9bc29844029e24d1981644ef372f7196 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/NumTraits.h @@ -0,0 +1,335 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_NUMTRAITS_H +#define EIGEN_NUMTRAITS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// default implementation of digits(), based on numeric_limits if specialized, +// 0 for integer types, and log2(epsilon()) otherwise. +template ::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_digits_impl { + EIGEN_DEVICE_FUNC constexpr static int run() { return std::numeric_limits::digits; } +}; + +template +struct default_digits_impl // Floating point +{ + EIGEN_DEVICE_FUNC constexpr static int run() { + using std::ceil; + using std::log2; + typedef typename NumTraits::Real Real; + return int(ceil(-log2(NumTraits::epsilon()))); + } +}; + +template +struct default_digits_impl // Integer +{ + EIGEN_DEVICE_FUNC constexpr static int run() { return 0; } +}; + +// default implementation of digits10(), based on numeric_limits if specialized, +// 0 for integer types, and floor((digits()-1)*log10(2)) otherwise. +template ::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_digits10_impl { + EIGEN_DEVICE_FUNC constexpr static int run() { return std::numeric_limits::digits10; } +}; + +template +struct default_digits10_impl // Floating point +{ + EIGEN_DEVICE_FUNC constexpr static int run() { + using std::floor; + using std::log10; + typedef typename NumTraits::Real Real; + return int(floor((internal::default_digits_impl::run() - 1) * log10(2))); + } +}; + +template +struct default_digits10_impl // Integer +{ + EIGEN_DEVICE_FUNC constexpr static int run() { return 0; } +}; + +// default implementation of max_digits10(), based on numeric_limits if specialized, +// 0 for integer types, and log10(2) * digits() + 1 otherwise. +template ::is_specialized, + bool is_integer = NumTraits::IsInteger> +struct default_max_digits10_impl { + EIGEN_DEVICE_FUNC constexpr static int run() { return std::numeric_limits::max_digits10; } +}; + +template +struct default_max_digits10_impl // Floating point +{ + EIGEN_DEVICE_FUNC constexpr static int run() { + using std::ceil; + using std::log10; + typedef typename NumTraits::Real Real; + return int(ceil(internal::default_digits_impl::run() * log10(2) + 1)); + } +}; + +template +struct default_max_digits10_impl // Integer +{ + EIGEN_DEVICE_FUNC constexpr static int run() { return 0; } +}; + +} // end namespace internal + +namespace numext { + +/** \internal bit-wise cast without changing the underlying bit representation. */ +#if defined(__cpp_lib_bit_cast) && __cpp_lib_bit_cast >= 201806L +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) { + return std::bit_cast(src); +} +#elif EIGEN_HAS_BUILTIN(__builtin_bit_cast) +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC constexpr Tgt bit_cast(const Src& src) { + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) + return __builtin_bit_cast(Tgt, src); +} +#else +template +EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC Tgt bit_cast(const Src& src) { + // The behaviour of memcpy is not specified for non-trivially copyable types + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value, THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(std::is_trivially_copyable::value && std::is_default_constructible::value, + THIS_TYPE_IS_NOT_SUPPORTED) + EIGEN_STATIC_ASSERT(sizeof(Src) == sizeof(Tgt), THIS_TYPE_IS_NOT_SUPPORTED) + + Tgt tgt; + // Load src into registers first. This allows the memcpy to be elided by CUDA. + const Src staged = src; + EIGEN_USING_STD(memcpy) + memcpy(static_cast(&tgt), static_cast(&staged), sizeof(Tgt)); + return tgt; +} +#endif +} // namespace numext + +// clang-format off +/** \class NumTraits + * \ingroup Core_Module + * + * \brief Holds information about the various numeric (i.e. scalar) types allowed by Eigen. + * + * \tparam T the numeric type at hand + * + * This class stores enums, typedefs and static methods giving information about a numeric type. + * + * The provided data consists of: + * \li A typedef \c Real, giving the "real part" type of \a T. If \a T is already real, + * then \c Real is just a typedef to \a T. If \a T is `std::complex` then \c Real + * is a typedef to \a U. + * \li A typedef \c NonInteger, giving the type that should be used for operations producing non-integral values, + * such as quotients, square roots, etc. If \a T is a floating-point type, then this typedef just gives + * \a T again. Note however that many Eigen functions such as internal::sqrt simply refuse to + * take integers. Outside of a few cases, Eigen doesn't do automatic type promotion. Thus, this typedef is + * only intended as a helper for code that needs to explicitly promote types. + * \li A typedef \c Literal giving the type to use for numeric literals such as "2" or "0.5". For instance, for + * `std::complex`, Literal is defined as \a U. Of course, this type must be fully compatible with \a T. In doubt, + * just use \a T here. + * \li A typedef \c Nested giving the type to use to nest a value inside of the expression tree. If you don't know what + * this means, just use \a T here. + * \li An enum value \c IsComplex. It is equal to 1 if \a T is a \c std::complex type, and to 0 otherwise. + * \li An enum value \c IsInteger. It is equal to \c 1 if \a T is an integer type such as \c int, and to \c 0 otherwise. + * \li Enum values \c ReadCost, \c AddCost and \c MulCost representing a rough estimate of the number of CPU cycles needed to by + * move / add / mul instructions respectively, assuming the data is already stored in CPU registers. Stay vague here. + * No need to do architecture-specific stuff. If you don't know what this means, just use \c Eigen::HugeCost. + * \li An enum value \c IsSigned. It is equal to \c 1 if \a T is a signed type and to 0 if \a T is unsigned. + * \li An enum value \c RequireInitialization. It is equal to \c 1 if the constructor of the numeric type \a T must be + * called, and to 0 if it is safe not to call it. Default is 0 if \a T is an arithmetic type, and 1 otherwise. + * \li An epsilon() function which, unlike + * `std::numeric_limits::epsilon()`, it returns a \c Real instead of a \a T. + * \li A dummy_precision() function returning a weak epsilon value. It is mainly used as a default value by the fuzzy + * comparison operators. + * \li highest() and lowest() functions returning the highest and lowest possible values respectively. + * \li digits() function returning the number of radix digits (non-sign digits for integers, mantissa for floating-point). + * This is the analogue of + * `std::numeric_limits::digits` which is used as the default implementation if specialized. + * \li digits10() function returning the number of decimal digits that can be represented without change. This is the + * analogue of + * `std::numeric_limits::digits10` which is used as the default implementation if specialized. + * \li max_digits10() function returning the number of decimal digits required to uniquely represent all distinct values + * of the type. This is the analogue of `std::numeric_limits::max_digits10` + * which is used as the default implementation if specialized. + * \li min_exponent() and max_exponent() functions returning the highest and lowest possible values, respectively, + * such that the radix raised to the power exponent-1 is a normalized floating-point number. These are equivalent + * to + * `std::numeric_limits::min_exponent`/`std::numeric_limits::max_exponent`. + * \li infinity() function returning a representation of positive infinity, if available. + * \li quiet_NaN() function returning a non-signaling "not-a-number", if available. + */ +// clang-format on +template +struct GenericNumTraits { + enum { + IsInteger = std::numeric_limits::is_integer, + IsSigned = std::numeric_limits::is_signed, + IsComplex = 0, + RequireInitialization = internal::is_arithmetic::value ? 0 : 1, + ReadCost = 1, + AddCost = 1, + MulCost = 1 + }; + + typedef T Real; + typedef std::conditional_t, T> NonInteger; + typedef T Nested; + typedef T Literal; + + EIGEN_DEVICE_FUNC constexpr static Real epsilon() { return numext::numeric_limits::epsilon(); } + + EIGEN_DEVICE_FUNC constexpr static int digits10() { return internal::default_digits10_impl::run(); } + + EIGEN_DEVICE_FUNC constexpr static int max_digits10() { return internal::default_max_digits10_impl::run(); } + + EIGEN_DEVICE_FUNC constexpr static int digits() { return internal::default_digits_impl::run(); } + + EIGEN_DEVICE_FUNC constexpr static int min_exponent() { return numext::numeric_limits::min_exponent; } + + EIGEN_DEVICE_FUNC constexpr static int max_exponent() { return numext::numeric_limits::max_exponent; } + + EIGEN_DEVICE_FUNC constexpr static Real dummy_precision() { + // make sure to override this for floating-point types + return Real(0); + } + + EIGEN_DEVICE_FUNC constexpr static T highest() { return (numext::numeric_limits::max)(); } + + EIGEN_DEVICE_FUNC constexpr static T lowest() { return (numext::numeric_limits::lowest)(); } + + EIGEN_DEVICE_FUNC constexpr static T infinity() { return numext::numeric_limits::infinity(); } + + EIGEN_DEVICE_FUNC constexpr static T quiet_NaN() { return numext::numeric_limits::quiet_NaN(); } +}; + +template +struct NumTraits : GenericNumTraits {}; + +template <> +struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC constexpr static float dummy_precision() { return 1e-5f; } +}; + +template <> +struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC constexpr static double dummy_precision() { return 1e-12; } +}; + +// GPU devices treat `long double` as `double`. +#ifndef EIGEN_GPU_COMPILE_PHASE +template <> +struct NumTraits : GenericNumTraits { + EIGEN_DEVICE_FUNC constexpr static long double dummy_precision() { return static_cast(1e-15l); } + +#if defined(EIGEN_ARCH_PPC) && (__LDBL_MANT_DIG__ == 106) + // PowerPC double double causes issues with some values + EIGEN_DEVICE_FUNC constexpr static long double epsilon() { + // 2^(-(__LDBL_MANT_DIG__)+1) + return static_cast(2.4651903288156618919116517665087e-32l); + } +#endif +}; +#endif + +template +struct NumTraits > : GenericNumTraits > { + typedef Real_ Real; + typedef typename NumTraits::Literal Literal; + enum { + IsComplex = 1, + IsSigned = NumTraits::IsSigned, + RequireInitialization = NumTraits::RequireInitialization, + ReadCost = 2 * NumTraits::ReadCost, + AddCost = 2 * NumTraits::AddCost, + MulCost = 4 * NumTraits::MulCost + 2 * NumTraits::AddCost + }; + + EIGEN_DEVICE_FUNC constexpr static Real epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC constexpr static Real dummy_precision() { return NumTraits::dummy_precision(); } + EIGEN_DEVICE_FUNC constexpr static int digits10() { return NumTraits::digits10(); } + EIGEN_DEVICE_FUNC constexpr static int max_digits10() { return NumTraits::max_digits10(); } +}; + +template +struct NumTraits > { + typedef Array ArrayType; + typedef typename NumTraits::Real RealScalar; + typedef Array Real; + typedef typename NumTraits::NonInteger NonIntegerScalar; + typedef Array NonInteger; + typedef ArrayType& Nested; + typedef typename NumTraits::Literal Literal; + + enum { + IsComplex = NumTraits::IsComplex, + IsInteger = NumTraits::IsInteger, + IsSigned = NumTraits::IsSigned, + RequireInitialization = 1, + ReadCost = ArrayType::SizeAtCompileTime == Dynamic + ? HugeCost + : ArrayType::SizeAtCompileTime * int(NumTraits::ReadCost), + AddCost = ArrayType::SizeAtCompileTime == Dynamic ? HugeCost + : ArrayType::SizeAtCompileTime * int(NumTraits::AddCost), + MulCost = ArrayType::SizeAtCompileTime == Dynamic ? HugeCost + : ArrayType::SizeAtCompileTime * int(NumTraits::MulCost) + }; + + EIGEN_DEVICE_FUNC constexpr static RealScalar epsilon() { return NumTraits::epsilon(); } + EIGEN_DEVICE_FUNC constexpr static RealScalar dummy_precision() { return NumTraits::dummy_precision(); } + + constexpr static int digits10() { return NumTraits::digits10(); } + constexpr static int max_digits10() { return NumTraits::max_digits10(); } +}; + +template <> +struct NumTraits : GenericNumTraits { + enum { RequireInitialization = 1, ReadCost = HugeCost, AddCost = HugeCost, MulCost = HugeCost }; + + constexpr static int digits10() { return 0; } + constexpr static int max_digits10() { return 0; } + + private: + static inline std::string epsilon(); + static inline std::string dummy_precision(); + static inline std::string lowest(); + static inline std::string highest(); + static inline std::string infinity(); + static inline std::string quiet_NaN(); +}; + +// Empty specialization for void to allow template specialization based on NumTraits::Real with T==void and SFINAE. +template <> +struct NumTraits {}; + +template <> +struct NumTraits : GenericNumTraits {}; + +} // end namespace Eigen + +#endif // EIGEN_NUMTRAITS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/PartialReduxEvaluator.h b/o-voxel/third_party/eigen/Eigen/src/Core/PartialReduxEvaluator.h new file mode 100644 index 0000000000000000000000000000000000000000..1b5cbdc2c03f63e96bdb3175f9245249227a4a73 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/PartialReduxEvaluator.h @@ -0,0 +1,253 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2011-2018 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARTIALREDUX_H +#define EIGEN_PARTIALREDUX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/*************************************************************************** + * + * This file provides evaluators for partial reductions. + * There are two modes: + * + * - scalar path: simply calls the respective function on the column or row. + * -> nothing special here, all the tricky part is handled by the return + * types of VectorwiseOp's members. They embed the functor calling the + * respective DenseBase's member function. + * + * - vectorized path: implements a packet-wise reductions followed by + * some (optional) processing of the outcome, e.g., division by n for mean. + * + * For the vectorized path let's observe that the packet-size and outer-unrolling + * are both decided by the assignment logic. So all we have to do is to decide + * on the inner unrolling. + * + * For the unrolling, we can reuse "internal::redux_vec_unroller" from Redux.h, + * but be need to be careful to specify correct increment. + * + ***************************************************************************/ + +/* logic deciding a strategy for unrolling of vectorized paths */ +template +struct packetwise_redux_traits { + enum { + OuterSize = int(Evaluator::IsRowMajor) ? Evaluator::RowsAtCompileTime : Evaluator::ColsAtCompileTime, + Cost = OuterSize == Dynamic ? HugeCost + : OuterSize * Evaluator::CoeffReadCost + (OuterSize - 1) * functor_traits::Cost, + Unrolling = Cost <= EIGEN_UNROLLING_LIMIT ? CompleteUnrolling : NoUnrolling + }; +}; + +/* Value to be returned when size==0 , by default let's return 0 */ +template +EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const Func&) { + const typename unpacket_traits::type zero(0); + return pset1(zero); +} + +/* For products the default is 1 */ +template +EIGEN_DEVICE_FUNC PacketType packetwise_redux_empty_value(const scalar_product_op&) { + return pset1(Scalar(1)); +} + +/* Perform the actual reduction */ +template ::Unrolling> +struct packetwise_redux_impl; + +/* Perform the actual reduction with unrolling */ +template +struct packetwise_redux_impl { + typedef redux_novec_unroller Base; + typedef typename Evaluator::Scalar Scalar; + + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func, Index /*size*/) { + return redux_vec_unroller::OuterSize>::template run(eval, + func); + } +}; + +/* Add a specialization of redux_vec_unroller for size==0 at compiletime. + * This specialization is not required for general reductions, which is + * why it is defined here. + */ +template +struct redux_vec_unroller { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator&, const Func& f) { + return packetwise_redux_empty_value(f); + } +}; + +/* Perform the actual reduction for dynamic sizes */ +template +struct packetwise_redux_impl { + typedef typename Evaluator::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + template + EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size) { + if (size == 0) return packetwise_redux_empty_value(func); + + const Index size4 = 1 + numext::round_down(size - 1, 4); + PacketType p = eval.template packetByOuterInner(0, 0); + // This loop is optimized for instruction pipelining: + // - each iteration generates two independent instructions + // - thanks to branch prediction and out-of-order execution we have independent instructions across loops + for (Index i = 1; i < size4; i += 4) + p = func.packetOp( + p, func.packetOp(func.packetOp(eval.template packetByOuterInner(i + 0, 0), + eval.template packetByOuterInner(i + 1, 0)), + func.packetOp(eval.template packetByOuterInner(i + 2, 0), + eval.template packetByOuterInner(i + 3, 0)))); + for (Index i = size4; i < size; ++i) + p = func.packetOp(p, eval.template packetByOuterInner(i, 0)); + return p; + } +}; + +template +struct packetwise_segment_redux_impl { + typedef typename Evaluator::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + template + EIGEN_DEVICE_FUNC static PacketType run(const Evaluator& eval, const Func& func, Index size, Index begin, + Index count) { + if (size == 0) return packetwise_redux_empty_value(func); + + PacketType p = eval.template packetSegmentByOuterInner(0, 0, begin, count); + for (Index i = 1; i < size; ++i) + p = func.packetOp(p, eval.template packetSegmentByOuterInner(i, 0, begin, count)); + return p; + } +}; + +template +struct evaluator > + : evaluator_base > { + typedef PartialReduxExpr XprType; + typedef typename internal::nested_eval::type ArgTypeNested; + typedef add_const_on_value_type_t ConstArgTypeNested; + typedef internal::remove_all_t ArgTypeNestedCleaned; + typedef typename ArgType::Scalar InputScalar; + typedef typename XprType::Scalar Scalar; + enum { + TraversalSize = Direction == int(Vertical) ? int(ArgType::RowsAtCompileTime) : int(ArgType::ColsAtCompileTime) + }; + typedef typename MemberOp::template Cost CostOpType; + enum { + CoeffReadCost = TraversalSize == Dynamic ? HugeCost + : TraversalSize == 0 + ? 1 + : int(TraversalSize) * int(evaluator::CoeffReadCost) + int(CostOpType::value), + + ArgFlags_ = evaluator::Flags, + + Vectorizable_ = bool(int(ArgFlags_) & PacketAccessBit) && bool(MemberOp::Vectorizable) && + (Direction == int(Vertical) ? bool(ArgFlags_ & RowMajorBit) : (ArgFlags_ & RowMajorBit) == 0) && + (TraversalSize != 0), + + Flags = (traits::Flags & RowMajorBit) | (evaluator::Flags & (HereditaryBits & (~RowMajorBit))) | + (Vectorizable_ ? PacketAccessBit : 0) | LinearAccessBit, + + Alignment = 0 // FIXME this will need to be improved once PartialReduxExpr is vectorized + }; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType xpr) : m_arg(xpr.nestedExpression()), m_functor(xpr.functor()) { + EIGEN_INTERNAL_CHECK_COST_VALUE(TraversalSize == Dynamic ? HugeCost + : (TraversalSize == 0 ? 1 : int(CostOpType::value))); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index i, Index j) const { + return coeff(Direction == Vertical ? j : i); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index index) const { + return m_functor(m_arg.template subVector(index)); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index i, Index j) const { + return packet(Direction == Vertical ? j : i); + } + + template + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packet(Index idx) const { + static constexpr int PacketSize = internal::unpacket_traits::size; + static constexpr int PanelRows = Direction == Vertical ? ArgType::RowsAtCompileTime : PacketSize; + static constexpr int PanelCols = Direction == Vertical ? PacketSize : ArgType::ColsAtCompileTime; + using PanelType = Block; + using PanelEvaluator = typename internal::redux_evaluator; + using BinaryOp = typename MemberOp::BinaryOp; + using Impl = internal::packetwise_redux_impl; + + // FIXME + // See bug 1612, currently if PacketSize==1 (i.e. complex with 128bits registers) then the storage-order of + // panel get reversed and methods like packetByOuterInner do not make sense anymore in this context. So let's just + // by pass "vectorization" in this case: + if (PacketSize == 1) return internal::pset1(coeff(idx)); + + Index startRow = Direction == Vertical ? 0 : idx; + Index startCol = Direction == Vertical ? idx : 0; + Index numRows = Direction == Vertical ? m_arg.rows() : PacketSize; + Index numCols = Direction == Vertical ? PacketSize : m_arg.cols(); + + PanelType panel(m_arg, startRow, startCol, numRows, numCols); + PanelEvaluator panel_eval(panel); + PacketType p = Impl::template run(panel_eval, m_functor.binaryFunc(), m_arg.outerSize()); + return p; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index i, Index j, Index begin, Index count) const { + return packetSegment(Direction == Vertical ? j : i, begin, count); + } + + template + EIGEN_STRONG_INLINE EIGEN_DEVICE_FUNC PacketType packetSegment(Index idx, Index begin, Index count) const { + static constexpr int PanelRows = Direction == Vertical ? ArgType::RowsAtCompileTime : Dynamic; + static constexpr int PanelCols = Direction == Vertical ? Dynamic : ArgType::ColsAtCompileTime; + using PanelType = Block; + using PanelEvaluator = typename internal::redux_evaluator; + using BinaryOp = typename MemberOp::BinaryOp; + using Impl = internal::packetwise_segment_redux_impl; + + Index startRow = Direction == Vertical ? 0 : idx; + Index startCol = Direction == Vertical ? idx : 0; + Index numRows = Direction == Vertical ? m_arg.rows() : begin + count; + Index numCols = Direction == Vertical ? begin + count : m_arg.cols(); + + PanelType panel(m_arg, startRow, startCol, numRows, numCols); + PanelEvaluator panel_eval(panel); + PacketType p = Impl::template run(panel_eval, m_functor.binaryFunc(), m_arg.outerSize(), begin, count); + return p; + } + + protected: + ConstArgTypeNested m_arg; + const MemberOp m_functor; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PARTIALREDUX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/PermutationMatrix.h b/o-voxel/third_party/eigen/Eigen/src/Core/PermutationMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..6b8fff58b9dd7f7f5cb624c01906c4ad7eb43624 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/PermutationMatrix.h @@ -0,0 +1,555 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Benoit Jacob +// Copyright (C) 2009-2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PERMUTATIONMATRIX_H +#define EIGEN_PERMUTATIONMATRIX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +enum PermPermProduct_t { PermPermProduct }; + +} // end namespace internal + +/** \class PermutationBase + * \ingroup Core_Module + * + * \brief Base class for permutations + * + * \tparam Derived the derived class + * + * This class is the base class for all expressions representing a permutation matrix, + * internally stored as a vector of integers. + * The convention followed here is that if \f$ \sigma \f$ is a permutation, the corresponding permutation matrix + * \f$ P_\sigma \f$ is such that if \f$ (e_1,\ldots,e_p) \f$ is the canonical basis, we have: + * \f[ P_\sigma(e_i) = e_{\sigma(i)}. \f] + * This convention ensures that for any two permutations \f$ \sigma, \tau \f$, we have: + * \f[ P_{\sigma\circ\tau} = P_\sigma P_\tau. \f] + * + * Permutation matrices are square and invertible. + * + * Notice that in addition to the member functions and operators listed here, there also are non-member + * operator* to multiply any kind of permutation object with any kind of matrix expression (MatrixBase) + * on either side. + * + * \sa class PermutationMatrix, class PermutationWrapper + */ +template +class PermutationBase : public EigenBase { + typedef internal::traits Traits; + typedef EigenBase Base; + + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + enum { + Flags = Traits::Flags, + RowsAtCompileTime = Traits::RowsAtCompileTime, + ColsAtCompileTime = Traits::ColsAtCompileTime, + MaxRowsAtCompileTime = Traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Traits::MaxColsAtCompileTime + }; + typedef typename Traits::StorageIndex StorageIndex; + typedef Matrix + DenseMatrixType; + typedef PermutationMatrix + PlainPermutationType; + typedef PlainPermutationType PlainObject; + using Base::derived; + typedef Inverse InverseReturnType; + typedef void Scalar; +#endif + + /** Copies the other permutation into *this */ + template + Derived& operator=(const PermutationBase& other) { + indices() = other.indices(); + return derived(); + } + + /** Assignment from the Transpositions \a tr */ + template + Derived& operator=(const TranspositionsBase& tr) { + setIdentity(tr.size()); + for (Index k = size() - 1; k >= 0; --k) applyTranspositionOnTheRight(k, tr.coeff(k)); + return derived(); + } + + /** \returns the number of rows */ + inline EIGEN_DEVICE_FUNC Index rows() const { return Index(indices().size()); } + + /** \returns the number of columns */ + inline EIGEN_DEVICE_FUNC Index cols() const { return Index(indices().size()); } + + /** \returns the size of a side of the respective square matrix, i.e., the number of indices */ + inline EIGEN_DEVICE_FUNC Index size() const { return Index(indices().size()); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const { + other.setZero(); + for (Index i = 0; i < rows(); ++i) other.coeffRef(indices().coeff(i), i) = typename DenseDerived::Scalar(1); + } +#endif + + /** \returns a Matrix object initialized from this permutation matrix. Notice that it + * is inefficient to return this Matrix object by value. For efficiency, favor using + * the Matrix constructor taking EigenBase objects. + */ + DenseMatrixType toDenseMatrix() const { return derived(); } + + /** \returns the plain matrix representation of the permutation. */ + DenseMatrixType eval() const { return toDenseMatrix(); } + + /** const version of indices(). */ + const IndicesType& indices() const { return derived().indices(); } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return derived().indices(); } + + /** Resizes to given size. + */ + inline void resize(Index newSize) { indices().resize(newSize); } + + /** Sets *this to be the identity permutation matrix */ + void setIdentity() { + StorageIndex n = StorageIndex(size()); + for (StorageIndex i = 0; i < n; ++i) indices().coeffRef(i) = i; + } + + /** Sets *this to be the identity permutation matrix of given size. + */ + void setIdentity(Index newSize) { + resize(newSize); + setIdentity(); + } + + /** Multiplies *this by the transposition \f$(ij)\f$ on the left. + * + * \returns a reference to *this. + * + * \warning This is much slower than applyTranspositionOnTheRight(Index,Index): + * this has linear complexity and requires a lot of branching. + * + * \sa applyTranspositionOnTheRight(Index,Index) + */ + Derived& applyTranspositionOnTheLeft(Index i, Index j) { + eigen_assert(i >= 0 && j >= 0 && i < size() && j < size()); + for (Index k = 0; k < size(); ++k) { + if (indices().coeff(k) == i) + indices().coeffRef(k) = StorageIndex(j); + else if (indices().coeff(k) == j) + indices().coeffRef(k) = StorageIndex(i); + } + return derived(); + } + + /** Multiplies *this by the transposition \f$(ij)\f$ on the right. + * + * \returns a reference to *this. + * + * This is a fast operation, it only consists in swapping two indices. + * + * \sa applyTranspositionOnTheLeft(Index,Index) + */ + Derived& applyTranspositionOnTheRight(Index i, Index j) { + eigen_assert(i >= 0 && j >= 0 && i < size() && j < size()); + std::swap(indices().coeffRef(i), indices().coeffRef(j)); + return derived(); + } + + /** \returns the inverse permutation matrix. + * + * \note \blank \note_try_to_help_rvo + */ + inline InverseReturnType inverse() const { return InverseReturnType(derived()); } + /** \returns the transpose permutation matrix. + * + * \note \blank \note_try_to_help_rvo + */ + inline InverseReturnType transpose() const { return InverseReturnType(derived()); } + + /**** multiplication helpers to hopefully get RVO ****/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN + protected: + template + void assignTranspose(const PermutationBase& other) { + for (Index i = 0; i < rows(); ++i) indices().coeffRef(other.indices().coeff(i)) = i; + } + template + void assignProduct(const Lhs& lhs, const Rhs& rhs) { + eigen_assert(lhs.cols() == rhs.rows()); + for (Index i = 0; i < rows(); ++i) indices().coeffRef(i) = lhs.indices().coeff(rhs.indices().coeff(i)); + } +#endif + + public: + /** \returns the product permutation matrix. + * + * \note \blank \note_try_to_help_rvo + */ + template + inline PlainPermutationType operator*(const PermutationBase& other) const { + return PlainPermutationType(internal::PermPermProduct, derived(), other.derived()); + } + + /** \returns the product of a permutation with another inverse permutation. + * + * \note \blank \note_try_to_help_rvo + */ + template + inline PlainPermutationType operator*(const InverseImpl& other) const { + return PlainPermutationType(internal::PermPermProduct, *this, other.eval()); + } + + /** \returns the product of an inverse permutation with another permutation. + * + * \note \blank \note_try_to_help_rvo + */ + template + friend inline PlainPermutationType operator*(const InverseImpl& other, + const PermutationBase& perm) { + return PlainPermutationType(internal::PermPermProduct, other.eval(), perm); + } + + /** \returns the determinant of the permutation matrix, which is either 1 or -1 depending on the parity of the + * permutation. + * + * This function is O(\c n) procedure allocating a buffer of \c n booleans. + */ + Index determinant() const { + Index res = 1; + Index n = size(); + Matrix mask(n); + mask.fill(false); + Index r = 0; + while (r < n) { + // search for the next seed + while (r < n && mask[r]) r++; + if (r >= n) break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + mask.coeffRef(k0) = true; + for (Index k = indices().coeff(k0); k != k0; k = indices().coeff(k)) { + mask.coeffRef(k) = true; + res = -res; + } + } + return res; + } + + protected: +}; + +namespace internal { +template +struct traits > + : traits< + Matrix > { + typedef PermutationStorage StorageKind; + typedef Matrix IndicesType; + typedef StorageIndex_ StorageIndex; + typedef void Scalar; +}; +} // namespace internal + +/** \class PermutationMatrix + * \ingroup Core_Module + * + * \brief Permutation matrix + * + * \tparam SizeAtCompileTime the number of rows/cols, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of rows/cols, or Dynamic. This optional parameter defaults to + * SizeAtCompileTime. Most of the time, you should not have to specify it. \tparam StorageIndex_ the integer type of the + * indices + * + * This class represents a permutation matrix, internally stored as a vector of integers. + * + * \sa class PermutationBase, class PermutationWrapper, class DiagonalMatrix + */ +template +class PermutationMatrix + : public PermutationBase > { + typedef PermutationBase Base; + typedef internal::traits Traits; + + public: + typedef const PermutationMatrix& Nested; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename Traits::StorageIndex StorageIndex; +#endif + + inline PermutationMatrix() {} + + /** Constructs an uninitialized permutation matrix of given size. + */ + explicit inline PermutationMatrix(Index size) : m_indices(size) { + eigen_internal_assert(size <= NumTraits::highest()); + } + + /** Copy constructor. */ + template + inline PermutationMatrix(const PermutationBase& other) : m_indices(other.indices()) {} + + /** Generic constructor from expression of the indices. The indices + * array has the meaning that the permutations sends each integer i to indices[i]. + * + * \warning It is your responsibility to check that the indices array that you passes actually + * describes a permutation, i.e., each value between 0 and n-1 occurs exactly once, where n is the + * array's size. + */ + template + explicit inline PermutationMatrix(const MatrixBase& indices) : m_indices(indices) {} + + /** Convert the Transpositions \a tr to a permutation matrix */ + template + explicit PermutationMatrix(const TranspositionsBase& tr) : m_indices(tr.size()) { + *this = tr; + } + + /** Copies the other permutation into *this */ + template + PermutationMatrix& operator=(const PermutationBase& other) { + m_indices = other.indices(); + return *this; + } + + /** Assignment from the Transpositions \a tr */ + template + PermutationMatrix& operator=(const TranspositionsBase& tr) { + return Base::operator=(tr.derived()); + } + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + /**** multiplication helpers to hopefully get RVO ****/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + PermutationMatrix(const InverseImpl& other) + : m_indices(other.derived().nestedExpression().size()) { + eigen_internal_assert(m_indices.size() <= NumTraits::highest()); + StorageIndex end = StorageIndex(m_indices.size()); + for (StorageIndex i = 0; i < end; ++i) + m_indices.coeffRef(other.derived().nestedExpression().indices().coeff(i)) = i; + } + template + PermutationMatrix(internal::PermPermProduct_t, const Lhs& lhs, const Rhs& rhs) : m_indices(lhs.indices().size()) { + Base::assignProduct(lhs, rhs); + } +#endif + + protected: + IndicesType m_indices; +}; + +namespace internal { +template +struct traits, PacketAccess_> > + : traits< + Matrix > { + typedef PermutationStorage StorageKind; + typedef Map, PacketAccess_> IndicesType; + typedef StorageIndex_ StorageIndex; + typedef void Scalar; +}; +} // namespace internal + +template +class Map, PacketAccess_> + : public PermutationBase< + Map, PacketAccess_> > { + typedef PermutationBase Base; + typedef internal::traits Traits; + + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; +#endif + + inline Map(const StorageIndex* indicesPtr) : m_indices(indicesPtr) {} + + inline Map(const StorageIndex* indicesPtr, Index size) : m_indices(indicesPtr, size) {} + + /** Copies the other permutation into *this */ + template + Map& operator=(const PermutationBase& other) { + return Base::operator=(other.derived()); + } + + /** Assignment from the Transpositions \a tr */ + template + Map& operator=(const TranspositionsBase& tr) { + return Base::operator=(tr.derived()); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Map& operator=(const Map& other) { + m_indices = other.m_indices; + return *this; + } +#endif + + /** const version of indices(). */ + const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the permutation. */ + IndicesType& indices() { return m_indices; } + + protected: + IndicesType m_indices; +}; + +template +class TranspositionsWrapper; +namespace internal { +template +struct traits > { + typedef PermutationStorage StorageKind; + typedef void Scalar; + typedef typename IndicesType_::Scalar StorageIndex; + typedef IndicesType_ IndicesType; + enum { + RowsAtCompileTime = IndicesType_::SizeAtCompileTime, + ColsAtCompileTime = IndicesType_::SizeAtCompileTime, + MaxRowsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = IndicesType::MaxSizeAtCompileTime, + Flags = 0 + }; +}; +} // namespace internal + +/** \class PermutationWrapper + * \ingroup Core_Module + * + * \brief Class to view a vector of integers as a permutation matrix + * + * \tparam IndicesType_ the type of the vector of integer (can be any compatible expression) + * + * This class allows to view any vector expression of integers as a permutation matrix. + * + * \sa class PermutationBase, class PermutationMatrix + */ +template +class PermutationWrapper : public PermutationBase > { + typedef PermutationBase Base; + typedef internal::traits Traits; + + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename Traits::IndicesType IndicesType; +#endif + + inline PermutationWrapper(const IndicesType& indices) : m_indices(indices) {} + + /** const version of indices(). */ + const internal::remove_all_t& indices() const { return m_indices; } + + protected: + typename IndicesType::Nested m_indices; +}; + +/** \returns the matrix with the permutation applied to the columns. + */ +template +EIGEN_DEVICE_FUNC const Product operator*( + const MatrixBase& matrix, const PermutationBase& permutation) { + return Product(matrix.derived(), permutation.derived()); +} + +/** \returns the matrix with the permutation applied to the rows. + */ +template +EIGEN_DEVICE_FUNC const Product operator*( + const PermutationBase& permutation, const MatrixBase& matrix) { + return Product(permutation.derived(), matrix.derived()); +} + +template +class InverseImpl : public EigenBase > { + typedef typename PermutationType::PlainPermutationType PlainPermutationType; + typedef internal::traits PermTraits; + + protected: + InverseImpl() {} + + public: + typedef Inverse InverseType; + using EigenBase >::derived; + +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename PermutationType::DenseMatrixType DenseMatrixType; + enum { + RowsAtCompileTime = PermTraits::RowsAtCompileTime, + ColsAtCompileTime = PermTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = PermTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = PermTraits::MaxColsAtCompileTime + }; +#endif + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + void evalTo(MatrixBase& other) const { + other.setZero(); + for (Index i = 0; i < derived().rows(); ++i) + other.coeffRef(i, derived().nestedExpression().indices().coeff(i)) = typename DenseDerived::Scalar(1); + } +#endif + + /** \return the equivalent permutation matrix */ + PlainPermutationType eval() const { return derived(); } + + DenseMatrixType toDenseMatrix() const { return derived(); } + + /** \returns the matrix with the inverse permutation applied to the columns. + */ + template + friend const Product operator*(const MatrixBase& matrix, + const InverseType& trPerm) { + return Product(matrix.derived(), trPerm.derived()); + } + + /** \returns the matrix with the inverse permutation applied to the rows. + */ + template + const Product operator*(const MatrixBase& matrix) const { + return Product(derived(), matrix.derived()); + } +}; + +template +const PermutationWrapper MatrixBase::asPermutation() const { + return derived(); +} + +namespace internal { + +template <> +struct AssignmentKind { + typedef EigenBase2EigenBase Kind; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PERMUTATIONMATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/PlainObjectBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/PlainObjectBase.h new file mode 100644 index 0000000000000000000000000000000000000000..f7464d821e74a8041e911c6d7faf3a6e5de98516 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/PlainObjectBase.h @@ -0,0 +1,1014 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_DENSESTORAGEBASE_H +#define EIGEN_DENSESTORAGEBASE_H + +#if defined(EIGEN_INITIALIZE_MATRICES_BY_ZERO) +#define EIGEN_INITIALIZE_COEFFS +#define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED \ + for (Index i = 0; i < base().size(); ++i) coeffRef(i) = Scalar(0); +#elif defined(EIGEN_INITIALIZE_MATRICES_BY_NAN) +#define EIGEN_INITIALIZE_COEFFS +#define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED \ + for (Index i = 0; i < base().size(); ++i) coeffRef(i) = std::numeric_limits::quiet_NaN(); +#else +#undef EIGEN_INITIALIZE_COEFFS +#define EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED +#endif + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +#ifndef EIGEN_NO_DEBUG +template +struct check_rows_cols_for_overflow { + EIGEN_STATIC_ASSERT(MaxRowsAtCompileTime* MaxColsAtCompileTime == MaxSizeAtCompileTime, + YOU MADE A PROGRAMMING MISTAKE) + template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index) {} +}; + +template +struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index, Index cols) { + constexpr Index MaxIndex = NumTraits::highest(); + bool error = cols > (MaxIndex / MaxRowsAtCompileTime); + if (error) throw_std_bad_alloc(); + } +}; + +template +struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index) { + constexpr Index MaxIndex = NumTraits::highest(); + bool error = rows > (MaxIndex / MaxColsAtCompileTime); + if (error) throw_std_bad_alloc(); + } +}; + +template <> +struct check_rows_cols_for_overflow { + template + EIGEN_DEVICE_FUNC static EIGEN_ALWAYS_INLINE constexpr void run(Index rows, Index cols) { + constexpr Index MaxIndex = NumTraits::highest(); + bool error = cols == 0 ? false : (rows > (MaxIndex / cols)); + if (error) throw_std_bad_alloc(); + } +}; +#endif + +template +struct conservative_resize_like_impl; + +template +struct matrix_swap_impl; + +} // end namespace internal + +/** \class PlainObjectBase + * \ingroup Core_Module + * \brief %Dense storage base class for matrices and arrays. + * + * This class can be extended with the help of the plugin mechanism described on the page + * \ref TopicCustomizing_Plugins by defining the preprocessor symbol \c EIGEN_PLAINOBJECTBASE_PLUGIN. + * + * \tparam Derived is the derived type, e.g., a Matrix or Array + * + * \sa \ref TopicClassHierarchy + */ +template +class PlainObjectBase : public internal::dense_xpr_base::type { + public: + enum { Options = internal::traits::Options }; + typedef typename internal::dense_xpr_base::type Base; + + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::Scalar Scalar; + + typedef typename internal::packet_traits::type PacketScalar; + typedef typename NumTraits::Real RealScalar; + typedef Derived DenseType; + + using Base::ColsAtCompileTime; + using Base::Flags; + using Base::IsVectorAtCompileTime; + using Base::MaxColsAtCompileTime; + using Base::MaxRowsAtCompileTime; + using Base::MaxSizeAtCompileTime; + using Base::RowsAtCompileTime; + using Base::SizeAtCompileTime; + + typedef Eigen::Map MapType; + typedef const Eigen::Map ConstMapType; + typedef Eigen::Map AlignedMapType; + typedef const Eigen::Map ConstAlignedMapType; + template + struct StridedMapType { + typedef Eigen::Map type; + }; + template + struct StridedConstMapType { + typedef Eigen::Map type; + }; + template + struct StridedAlignedMapType { + typedef Eigen::Map type; + }; + template + struct StridedConstAlignedMapType { + typedef Eigen::Map type; + }; + + protected: + DenseStorage m_storage; + + public: + enum { NeedsToAlign = (SizeAtCompileTime != Dynamic) && (internal::traits::Alignment > 0) }; + EIGEN_MAKE_ALIGNED_OPERATOR_NEW_IF(NeedsToAlign) + + EIGEN_STATIC_ASSERT(internal::check_implication(MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1, + (int(Options) & RowMajor) == RowMajor), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT(internal::check_implication(MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1, + (int(Options) & RowMajor) == 0), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT((RowsAtCompileTime == Dynamic) || (RowsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT((ColsAtCompileTime == Dynamic) || (ColsAtCompileTime >= 0), INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT((MaxRowsAtCompileTime == Dynamic) || (MaxRowsAtCompileTime >= 0), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == Dynamic) || (MaxColsAtCompileTime >= 0), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT((MaxRowsAtCompileTime == RowsAtCompileTime || RowsAtCompileTime == Dynamic), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT((MaxColsAtCompileTime == ColsAtCompileTime || ColsAtCompileTime == Dynamic), + INVALID_MATRIX_TEMPLATE_PARAMETERS) + EIGEN_STATIC_ASSERT(((Options & (DontAlign | RowMajor)) == Options), INVALID_MATRIX_TEMPLATE_PARAMETERS) + + EIGEN_DEVICE_FUNC Base& base() { return *static_cast(this); } + EIGEN_DEVICE_FUNC const Base& base() const { return *static_cast(this); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_storage.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_storage.cols(); } + + /** This is an overloaded version of DenseCoeffsBase::coeff(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index rowId, Index colId) const { + if (Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeff(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeff(Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeff(Index index) const { + return m_storage.data()[index]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index,Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index,Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index rowId, Index colId) { + if (Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is an overloaded version of DenseCoeffsBase::coeffRef(Index) const + * provided to by-pass the creation of an evaluator of the expression, thus saving compilation efforts. + * + * See DenseCoeffsBase::coeffRef(Index) const for details. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Scalar& coeffRef(Index index) { return m_storage.data()[index]; } + + /** This is the const version of coeffRef(Index,Index) which is thus synonym of coeff(Index,Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index rowId, Index colId) const { + if (Flags & RowMajorBit) + return m_storage.data()[colId + rowId * m_storage.cols()]; + else // column-major + return m_storage.data()[rowId + colId * m_storage.rows()]; + } + + /** This is the const version of coeffRef(Index) which is thus synonym of coeff(Index). + * It is provided for convenience. */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar& coeffRef(Index index) const { + return m_storage.data()[index]; + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index rowId, Index colId) const { + return internal::ploadt( + m_storage.data() + (Flags & RowMajorBit ? colId + rowId * m_storage.cols() : rowId + colId * m_storage.rows())); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE PacketScalar packet(Index index) const { + return internal::ploadt(m_storage.data() + index); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index rowId, Index colId, const PacketScalar& val) { + internal::pstoret( + m_storage.data() + (Flags & RowMajorBit ? colId + rowId * m_storage.cols() : rowId + colId * m_storage.rows()), + val); + } + + /** \internal */ + template + EIGEN_STRONG_INLINE void writePacket(Index index, const PacketScalar& val) { + internal::pstoret(m_storage.data() + index, val); + } + + /** \returns a const pointer to the data array of this matrix */ + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const { return m_storage.data(); } + + /** \returns a pointer to the data array of this matrix */ + EIGEN_DEVICE_FUNC constexpr Scalar* data() { return m_storage.data(); } + + /** Resizes \c *this to a \a rows x \a cols matrix. + * + * This method is intended for dynamic-size matrices, although it is legal to call it on any + * matrix as long as fixed dimensions are left unchanged. If you only want to change the number + * of rows and/or of columns, you can use resize(NoChange_t, Index), resize(Index, NoChange_t). + * + * If the current number of coefficients of \c *this exactly matches the + * product \a rows * \a cols, then no memory allocation is performed and + * the current values are left unchanged. In all other cases, including + * shrinking, the data is reallocated and all previous values are lost. + * + * Example: \include Matrix_resize_int_int.cpp + * Output: \verbinclude Matrix_resize_int_int.out + * + * \sa resize(Index) for vectors, resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr void resize(Index rows, Index cols) { + eigen_assert(internal::check_implication(RowsAtCompileTime != Dynamic, rows == RowsAtCompileTime) && + internal::check_implication(ColsAtCompileTime != Dynamic, cols == ColsAtCompileTime) && + internal::check_implication(RowsAtCompileTime == Dynamic && MaxRowsAtCompileTime != Dynamic, + rows <= MaxRowsAtCompileTime) && + internal::check_implication(ColsAtCompileTime == Dynamic && MaxColsAtCompileTime != Dynamic, + cols <= MaxColsAtCompileTime) && + rows >= 0 && cols >= 0 && "Invalid sizes when resizing a matrix or array."); +#ifndef EIGEN_NO_DEBUG + internal::check_rows_cols_for_overflow::run(rows, + cols); +#endif +#ifdef EIGEN_INITIALIZE_COEFFS + Index size = rows * cols; + bool size_changed = size != this->size(); + m_storage.resize(size, rows, cols); + if (size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED +#else + m_storage.resize(rows * cols, rows, cols); +#endif + } + + /** Resizes \c *this to a vector of length \a size + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * Example: \include Matrix_resize_int.cpp + * Output: \verbinclude Matrix_resize_int.out + * + * \sa resize(Index,Index), resize(NoChange_t, Index), resize(Index, NoChange_t) + */ + EIGEN_DEVICE_FUNC constexpr void resize(Index size) { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(PlainObjectBase) + eigen_assert(((SizeAtCompileTime == Dynamic && (MaxSizeAtCompileTime == Dynamic || size <= MaxSizeAtCompileTime)) || + SizeAtCompileTime == size) && + size >= 0); +#ifdef EIGEN_INITIALIZE_COEFFS + bool size_changed = size != this->size(); +#endif + if (RowsAtCompileTime == 1) + m_storage.resize(size, 1, size); + else + m_storage.resize(size, size, 1); +#ifdef EIGEN_INITIALIZE_COEFFS + if (size_changed) EIGEN_INITIALIZE_COEFFS_IF_THAT_OPTION_IS_ENABLED +#endif + } + + /** Resizes the matrix, changing only the number of columns. For the parameter of type NoChange_t, just pass the + * special value \c NoChange as in the example below. + * + * Example: \include Matrix_resize_NoChange_int.cpp + * Output: \verbinclude Matrix_resize_NoChange_int.out + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC constexpr void resize(NoChange_t, Index cols) { resize(rows(), cols); } + + /** Resizes the matrix, changing only the number of rows. For the parameter of type NoChange_t, just pass the special + * value \c NoChange as in the example below. + * + * Example: \include Matrix_resize_int_NoChange.cpp + * Output: \verbinclude Matrix_resize_int_NoChange.out + * + * \sa resize(Index,Index) + */ + EIGEN_DEVICE_FUNC constexpr void resize(Index rows, NoChange_t) { resize(rows, cols()); } + + /** Resizes \c *this to have the same dimensions as \a other. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resizeLike(const EigenBase& _other) { + const OtherDerived& other = _other.derived(); +#ifndef EIGEN_NO_DEBUG + internal::check_rows_cols_for_overflow::run( + other.rows(), other.cols()); +#endif + const Index othersize = other.rows() * other.cols(); + if (RowsAtCompileTime == 1) { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(1, othersize); + } else if (ColsAtCompileTime == 1) { + eigen_assert(other.rows() == 1 || other.cols() == 1); + resize(othersize, 1); + } else + resize(other.rows(), other.cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will be uninitialized. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(Index rows, Index cols) { + internal::conservative_resize_like_impl::run(*this, rows, cols); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of columns unchanged. + * + * In case the matrix is growing, new rows will be uninitialized. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(Index rows, NoChange_t) { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows, cols()); + } + + /** Resizes the matrix to \a rows x \a cols while leaving old values untouched. + * + * As opposed to conservativeResize(Index rows, Index cols), this version leaves + * the number of rows unchanged. + * + * In case the matrix is growing, new columns will be uninitialized. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(NoChange_t, Index cols) { + // Note: see the comment in conservativeResize(Index,Index) + conservativeResize(rows(), cols); + } + + /** Resizes the vector to \a size while retaining old values. + * + * \only_for_vectors. This method does not work for + * partially dynamic matrices when the static dimension is anything other + * than 1. For example it will not work with Matrix. + * + * When values are appended, they will be uninitialized. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResize(Index size) { + internal::conservative_resize_like_impl::run(*this, size); + } + + /** Resizes the matrix to \a rows x \a cols of \c other, while leaving old values untouched. + * + * The method is intended for matrices of dynamic size. If you only want to change the number + * of rows and/or of columns, you can use conservativeResize(NoChange_t, Index) or + * conservativeResize(Index, NoChange_t). + * + * Matrices are resized relative to the top-left element. In case values need to be + * appended to the matrix they will copied from \c other. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void conservativeResizeLike(const DenseBase& other) { + internal::conservative_resize_like_impl::run(*this, other); + } + + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& operator=(const PlainObjectBase& other) { + return _set(other); + } + + /** \sa MatrixBase::lazyAssign() */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& lazyAssign(const DenseBase& other) { + _resize_to_match(other); + return Base::lazyAssign(other.derived()); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const ReturnByValue& func) { + resize(func.rows(), func.cols()); + return Base::operator=(func); + } + + // Prevent user from trying to instantiate PlainObjectBase objects + // by making all its constructor protected. See bug 1074. + protected: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase() = default; + /** \brief Move constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(PlainObjectBase&&) = default; + /** \brief Move assignment operator */ + EIGEN_DEVICE_FUNC constexpr PlainObjectBase& operator=(PlainObjectBase&& other) noexcept { + m_storage = std::move(other.m_storage); + return *this; + } + + /** Copy constructor */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr PlainObjectBase(const PlainObjectBase&) = default; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(Index size, Index rows, Index cols) + : m_storage(size, rows, cols) {} + + /** \brief Construct a row of column vector with fixed size from an arbitrary number of coefficients. + * + * \only_for_vectors + * + * This constructor is for 1D array or vectors with more than 4 coefficients. + * + * \warning To construct a column (resp. row) vector of fixed length, the number of values passed to this + * constructor must match the the fixed number of rows (resp. columns) of \c *this. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const Scalar& a0, const Scalar& a1, const Scalar& a2, + const Scalar& a3, const ArgTypes&... args) + : m_storage() { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, sizeof...(args) + 4); + m_storage.data()[0] = a0; + m_storage.data()[1] = a1; + m_storage.data()[2] = a2; + m_storage.data()[3] = a3; + Index i = 4; + auto x = {(m_storage.data()[i++] = args, 0)...}; + static_cast(x); + } + + /** \brief Constructs a Matrix or Array and initializes it by elements given by an initializer list of initializer + * lists + */ + EIGEN_DEVICE_FUNC explicit constexpr EIGEN_STRONG_INLINE PlainObjectBase( + const std::initializer_list>& list) + : m_storage() { + size_t list_size = 0; + if (list.begin() != list.end()) { + list_size = list.begin()->size(); + } + + // This is to allow syntax like VectorXi {{1, 2, 3, 4}} + if (ColsAtCompileTime == 1 && list.size() == 1) { + eigen_assert(list_size == static_cast(RowsAtCompileTime) || RowsAtCompileTime == Dynamic); + resize(list_size, ColsAtCompileTime); + if (list.begin()->begin() != nullptr) { + Index index = 0; + for (const Scalar& e : *list.begin()) { + coeffRef(index++) = e; + } + } + } else { + eigen_assert(list.size() == static_cast(RowsAtCompileTime) || RowsAtCompileTime == Dynamic); + eigen_assert(list_size == static_cast(ColsAtCompileTime) || ColsAtCompileTime == Dynamic); + resize(list.size(), list_size); + + Index row_index = 0; + for (const std::initializer_list& row : list) { + eigen_assert(list_size == row.size()); + Index col_index = 0; + for (const Scalar& e : row) { + coeffRef(row_index, col_index) = e; + ++col_index; + } + ++row_index; + } + } + } + + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const DenseBase& other) : m_storage() { + resizeLike(other); + _set_noalias(other); + } + + /** \sa PlainObjectBase::operator=(const EigenBase&) */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const EigenBase& other) : m_storage() { + resizeLike(other); + *this = other.derived(); + } + /** \brief Copy constructor with in-place evaluation */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PlainObjectBase(const ReturnByValue& other) { + // FIXME this does not automatically transpose vectors if necessary + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + public: + /** \brief Copies the generic expression \a other into *this. + * \copydetails DenseBase::operator=(const EigenBase &other) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& operator=(const EigenBase& other) { + _resize_to_match(other); + Base::operator=(other.derived()); + return this->derived(); + } + + /** \name Map + * These are convenience functions returning Map objects. The Map() static functions return unaligned Map objects, + * while the AlignedMap() functions return aligned Map objects and thus should be called only with 16-byte-aligned + * \a data pointers. + * + * Here is an example using strides: + * \include Matrix_Map_stride.cpp + * Output: \verbinclude Matrix_Map_stride.out + * + * \see class Map + */ + ///@{ + static inline ConstMapType Map(const Scalar* data) { return ConstMapType(data); } + static inline MapType Map(Scalar* data) { return MapType(data); } + static inline ConstMapType Map(const Scalar* data, Index size) { return ConstMapType(data, size); } + static inline MapType Map(Scalar* data, Index size) { return MapType(data, size); } + static inline ConstMapType Map(const Scalar* data, Index rows, Index cols) { return ConstMapType(data, rows, cols); } + static inline MapType Map(Scalar* data, Index rows, Index cols) { return MapType(data, rows, cols); } + + static inline ConstAlignedMapType MapAligned(const Scalar* data) { return ConstAlignedMapType(data); } + static inline AlignedMapType MapAligned(Scalar* data) { return AlignedMapType(data); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index size) { + return ConstAlignedMapType(data, size); + } + static inline AlignedMapType MapAligned(Scalar* data, Index size) { return AlignedMapType(data, size); } + static inline ConstAlignedMapType MapAligned(const Scalar* data, Index rows, Index cols) { + return ConstAlignedMapType(data, rows, cols); + } + static inline AlignedMapType MapAligned(Scalar* data, Index rows, Index cols) { + return AlignedMapType(data, rows, cols); + } + + template + static inline typename StridedConstMapType>::type Map(const Scalar* data, + const Stride& stride) { + return typename StridedConstMapType>::type(data, stride); + } + template + static inline typename StridedMapType>::type Map(Scalar* data, + const Stride& stride) { + return typename StridedMapType>::type(data, stride); + } + template + static inline typename StridedConstMapType>::type Map(const Scalar* data, Index size, + const Stride& stride) { + return typename StridedConstMapType>::type(data, size, stride); + } + template + static inline typename StridedMapType>::type Map(Scalar* data, Index size, + const Stride& stride) { + return typename StridedMapType>::type(data, size, stride); + } + template + static inline typename StridedConstMapType>::type Map(const Scalar* data, Index rows, Index cols, + const Stride& stride) { + return typename StridedConstMapType>::type(data, rows, cols, stride); + } + template + static inline typename StridedMapType>::type Map(Scalar* data, Index rows, Index cols, + const Stride& stride) { + return typename StridedMapType>::type(data, rows, cols, stride); + } + + template + static inline typename StridedConstAlignedMapType>::type MapAligned( + const Scalar* data, const Stride& stride) { + return typename StridedConstAlignedMapType>::type(data, stride); + } + template + static inline typename StridedAlignedMapType>::type MapAligned( + Scalar* data, const Stride& stride) { + return typename StridedAlignedMapType>::type(data, stride); + } + template + static inline typename StridedConstAlignedMapType>::type MapAligned( + const Scalar* data, Index size, const Stride& stride) { + return typename StridedConstAlignedMapType>::type(data, size, stride); + } + template + static inline typename StridedAlignedMapType>::type MapAligned( + Scalar* data, Index size, const Stride& stride) { + return typename StridedAlignedMapType>::type(data, size, stride); + } + template + static inline typename StridedConstAlignedMapType>::type MapAligned( + const Scalar* data, Index rows, Index cols, const Stride& stride) { + return typename StridedConstAlignedMapType>::type(data, rows, cols, stride); + } + template + static inline typename StridedAlignedMapType>::type MapAligned( + Scalar* data, Index rows, Index cols, const Stride& stride) { + return typename StridedAlignedMapType>::type(data, rows, cols, stride); + } + ///@} + + using Base::setConstant; + EIGEN_DEVICE_FUNC Derived& setConstant(Index size, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, Index cols, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(NoChange_t, Index cols, const Scalar& val); + EIGEN_DEVICE_FUNC Derived& setConstant(Index rows, NoChange_t, const Scalar& val); + + using Base::setZero; + EIGEN_DEVICE_FUNC Derived& setZero(Index size); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setZero(NoChange_t, Index cols); + EIGEN_DEVICE_FUNC Derived& setZero(Index rows, NoChange_t); + + using Base::setOnes; + EIGEN_DEVICE_FUNC Derived& setOnes(Index size); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, Index cols); + EIGEN_DEVICE_FUNC Derived& setOnes(NoChange_t, Index cols); + EIGEN_DEVICE_FUNC Derived& setOnes(Index rows, NoChange_t); + + using Base::setRandom; + Derived& setRandom(Index size); + Derived& setRandom(Index rows, Index cols); + Derived& setRandom(NoChange_t, Index cols); + Derived& setRandom(Index rows, NoChange_t); + +#ifdef EIGEN_PLAINOBJECTBASE_PLUGIN +#include EIGEN_PLAINOBJECTBASE_PLUGIN +#endif + + protected: + /** \internal Resizes *this in preparation for assigning \a other to it. + * Takes care of doing all the checking that's needed. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _resize_to_match(const EigenBase& other) { +#ifdef EIGEN_NO_AUTOMATIC_RESIZING + eigen_assert((this->size() == 0 || (IsVectorAtCompileTime ? (this->size() == other.size()) + : (rows() == other.rows() && cols() == other.cols()))) && + "Size mismatch. Automatic resizing is disabled because EIGEN_NO_AUTOMATIC_RESIZING is defined"); + EIGEN_ONLY_USED_FOR_DEBUG(other); +#else + resizeLike(other); +#endif + } + + /** + * \brief Copies the value of the expression \a other into \c *this with automatic resizing. + * + * *this might be resized to match the dimensions of \a other. If *this was a null matrix (not already initialized), + * it will be initialized. + * + * Note that copying a row-vector into a vector (and conversely) is allowed. + * The resizing, if any, is then done in the appropriate way so that row-vectors + * remain row-vectors and vectors remain vectors. + * + * \sa operator=(const MatrixBase&), _set_noalias() + * + * \internal + */ + // aliasing is dealt once in internal::call_assignment + // so at this stage we have to assume aliasing... and resising has to be done later. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set(const DenseBase& other) { + internal::call_assignment(this->derived(), other.derived()); + return this->derived(); + } + + /** \internal Like _set() but additionally makes the assumption that no aliasing effect can happen (which + * is the case when creating a new matrix) so one can enforce lazy evaluation. + * + * \sa operator=(const MatrixBase&), _set() + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Derived& _set_noalias(const DenseBase& other) { + // I don't think we need this resize call since the lazyAssign will anyways resize + // and lazyAssign will be called by the assign selector. + //_resize_to_match(other); + // the 'false' below means to enforce lazy evaluation. We don't use lazyAssign() because + // it wouldn't allow to copy a row-vector into a column-vector. + internal::call_assignment_no_alias(this->derived(), other.derived(), + internal::assign_op()); + return this->derived(); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(Index rows, Index cols, + std::enable_if_t* = 0) { + EIGEN_STATIC_ASSERT(internal::is_valid_index_type::value && internal::is_valid_index_type::value, + T0 AND T1 MUST BE INTEGER TYPES) + resize(rows, cols); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2(const T0& val0, const T1& val1, + std::enable_if_t* = 0) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init2( + const Index& val0, const Index& val1, + std::enable_if_t<(!internal::is_same::value) && (internal::is_same::value) && + (internal::is_same::value) && Base::SizeAtCompileTime == 2, + T1>* = 0) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 2) + m_storage.data()[0] = Scalar(val0); + m_storage.data()[1] = Scalar(val1); + } + + // The argument is convertible to the Index type and we either have a non 1x1 Matrix, or a dynamic-sized Array, + // then the argument is meant to be the size of the object. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + Index size, + std::enable_if_t<(Base::SizeAtCompileTime != 1 || !internal::is_convertible::value) && + ((!internal::is_same::XprKind, ArrayXpr>::value || + Base::SizeAtCompileTime == Dynamic)), + T>* = 0) { + // NOTE MSVC 2008 complains if we directly put bool(NumTraits::IsInteger) as the EIGEN_STATIC_ASSERT argument. + const bool is_integer_alike = internal::is_valid_index_type::value; + EIGEN_UNUSED_VARIABLE(is_integer_alike); + EIGEN_STATIC_ASSERT(is_integer_alike, FLOATING_POINT_ARGUMENT_PASSED__INTEGER_WAS_EXPECTED) + resize(size); + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar + // type can be implicitly converted) + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + const Scalar& val0, + std::enable_if_t::value, T>* = 0) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = val0; + } + + // We have a 1x1 matrix/array => the argument is interpreted as the value of the unique coefficient (case where scalar + // type match the index type) + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + const Index& val0, + std::enable_if_t<(!internal::is_same::value) && (internal::is_same::value) && + Base::SizeAtCompileTime == 1 && internal::is_convertible::value, + T*>* = 0) { + EIGEN_STATIC_ASSERT_VECTOR_SPECIFIC_SIZE(PlainObjectBase, 1) + m_storage.data()[0] = Scalar(val0); + } + + // Initialize a fixed size matrix from a pointer to raw data + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Scalar* data) { + this->_set_noalias(ConstMapType(data)); + } + + // Initialize an arbitrary matrix from a dense expression + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const DenseBase& other) { + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from an object convertible to the Derived type. + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const Derived& other) { + this->_set_noalias(other); + } + + // Initialize an arbitrary matrix from a generic Eigen expression + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const EigenBase& other) { + this->derived() = other; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const ReturnByValue& other) { + resize(other.rows(), other.cols()); + other.evalTo(this->derived()); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1(const RotationBase& r) { + this->derived() = r; + } + + // For fixed-size Array + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + const Scalar& val0, + std::enable_if_t::value && + internal::is_same::XprKind, ArrayXpr>::value, + T>* = 0) { + Base::setConstant(val0); + } + + // For fixed-size Array + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _init1( + const Index& val0, + std::enable_if_t<(!internal::is_same::value) && (internal::is_same::value) && + Base::SizeAtCompileTime != Dynamic && Base::SizeAtCompileTime != 1 && + internal::is_convertible::value && + internal::is_same::XprKind, ArrayXpr>::value, + T*>* = 0) { + Base::setConstant(val0); + } + + template + friend struct internal::matrix_swap_impl; + + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** \internal + * \brief Override DenseBase::swap() since for dynamic-sized matrices + * of same type it is enough to swap the data pointers. + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void swap(DenseBase& other) { + enum {SwapPointers = internal::is_same::value && Base::SizeAtCompileTime == Dynamic}; + internal::matrix_swap_impl::run(this->derived(), other.derived()); + } + + /** \internal + * \brief const version forwarded to DenseBase::swap + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void swap(DenseBase const& other) { + Base::swap(other.derived()); + } + + enum {IsPlainObjectBase = 1}; +#endif + public: + // These apparently need to be down here for nvcc+icc to prevent duplicate + // Map symbol. + template + friend class Eigen::Map; + friend class Eigen::Map; + friend class Eigen::Map; +#if EIGEN_MAX_ALIGN_BYTES > 0 + // for EIGEN_MAX_ALIGN_BYTES==0, AlignedMax==Unaligned, and many compilers generate warnings for friend-ing a class + // twice. + friend class Eigen::Map; + friend class Eigen::Map; +#endif +}; + +namespace internal { + +template +struct conservative_resize_like_impl { + static constexpr bool IsRelocatable = std::is_trivially_copyable::value; + static void run(DenseBase& _this, Index rows, Index cols) { + if (_this.rows() == rows && _this.cols() == cols) return; + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + + if (IsRelocatable && + ((Derived::IsRowMajor && _this.cols() == cols) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && _this.rows() == rows))) // column-major and we change only the number of columns + { +#ifndef EIGEN_NO_DEBUG + internal::check_rows_cols_for_overflow::run(rows, cols); +#endif + _this.derived().m_storage.conservativeResize(rows * cols, rows, cols); + } else { + // The storage order does not allow us to use reallocation. + Derived tmp(rows, cols); + const Index common_rows = numext::mini(rows, _this.rows()); + const Index common_cols = numext::mini(cols, _this.cols()); + tmp.block(0, 0, common_rows, common_cols) = _this.block(0, 0, common_rows, common_cols); + _this.derived().swap(tmp); + } + } + + static void run(DenseBase& _this, const DenseBase& other) { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + // Note: Here is space for improvement. Basically, for conservativeResize(Index,Index), + // neither RowsAtCompileTime or ColsAtCompileTime must be Dynamic. If only one of the + // dimensions is dynamic, one could use either conservativeResize(Index rows, NoChange_t) or + // conservativeResize(NoChange_t, Index cols). For these methods new static asserts like + // EIGEN_STATIC_ASSERT_DYNAMIC_ROWS and EIGEN_STATIC_ASSERT_DYNAMIC_COLS would be good. + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(Derived) + EIGEN_STATIC_ASSERT_DYNAMIC_SIZE(OtherDerived) + + if (IsRelocatable && + ((Derived::IsRowMajor && _this.cols() == other.cols()) || // row-major and we change only the number of rows + (!Derived::IsRowMajor && + _this.rows() == other.rows()))) // column-major and we change only the number of columns + { + const Index new_rows = other.rows() - _this.rows(); + const Index new_cols = other.cols() - _this.cols(); + _this.derived().m_storage.conservativeResize(other.size(), other.rows(), other.cols()); + if (new_rows > 0) + _this.bottomRightCorner(new_rows, other.cols()) = other.bottomRows(new_rows); + else if (new_cols > 0) + _this.bottomRightCorner(other.rows(), new_cols) = other.rightCols(new_cols); + } else { + // The storage order does not allow us to use reallocation. + Derived tmp(other); + const Index common_rows = numext::mini(tmp.rows(), _this.rows()); + const Index common_cols = numext::mini(tmp.cols(), _this.cols()); + tmp.block(0, 0, common_rows, common_cols) = _this.block(0, 0, common_rows, common_cols); + _this.derived().swap(tmp); + } + } +}; + +// Here, the specialization for vectors inherits from the general matrix case +// to allow calling .conservativeResize(rows,cols) on vectors. +template +struct conservative_resize_like_impl + : conservative_resize_like_impl { + typedef conservative_resize_like_impl Base; + using Base::IsRelocatable; + using Base::run; + + static void run(DenseBase& _this, Index size) { + const Index new_rows = Derived::RowsAtCompileTime == 1 ? 1 : size; + const Index new_cols = Derived::RowsAtCompileTime == 1 ? size : 1; + if (IsRelocatable) + _this.derived().m_storage.conservativeResize(size, new_rows, new_cols); + else + Base::run(_this.derived(), new_rows, new_cols); + } + + static void run(DenseBase& _this, const DenseBase& other) { + if (_this.rows() == other.rows() && _this.cols() == other.cols()) return; + + const Index num_new_elements = other.size() - _this.size(); + + const Index new_rows = Derived::RowsAtCompileTime == 1 ? 1 : other.rows(); + const Index new_cols = Derived::RowsAtCompileTime == 1 ? other.cols() : 1; + if (IsRelocatable) + _this.derived().m_storage.conservativeResize(other.size(), new_rows, new_cols); + else + Base::run(_this.derived(), new_rows, new_cols); + + if (num_new_elements > 0) _this.tail(num_new_elements) = other.tail(num_new_elements); + } +}; + +template +struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE void run(MatrixTypeA& a, MatrixTypeB& b) { a.base().swap(b); } +}; + +template +struct matrix_swap_impl { + EIGEN_DEVICE_FUNC static inline void run(MatrixTypeA& a, MatrixTypeB& b) { + static_cast(a).m_storage.swap(static_cast(b).m_storage); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_DENSESTORAGEBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Product.h b/o-voxel/third_party/eigen/Eigen/src/Core/Product.h new file mode 100644 index 0000000000000000000000000000000000000000..385f6aad0a2a004a3ec37256b5349c025743905f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Product.h @@ -0,0 +1,307 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCT_H +#define EIGEN_PRODUCT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +class ProductImpl; + +namespace internal { + +template +struct traits> { + typedef remove_all_t LhsCleaned; + typedef remove_all_t RhsCleaned; + typedef traits LhsTraits; + typedef traits RhsTraits; + + typedef MatrixXpr XprKind; + + typedef typename ScalarBinaryOpTraits::Scalar, + typename traits::Scalar>::ReturnType Scalar; + typedef typename product_promote_storage_type::ret>::ret StorageKind; + typedef typename promote_index_type::type + StorageIndex; + + enum { + RowsAtCompileTime = LhsTraits::RowsAtCompileTime, + ColsAtCompileTime = RhsTraits::ColsAtCompileTime, + MaxRowsAtCompileTime = LhsTraits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsTraits::MaxColsAtCompileTime, + + // FIXME: only needed by GeneralMatrixMatrixTriangular + InnerSize = min_size_prefer_fixed(LhsTraits::ColsAtCompileTime, RhsTraits::RowsAtCompileTime), + + // The storage order is somewhat arbitrary here. The correct one will be determined through the evaluator. + Flags = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? RowMajorBit + : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) ? 0 + : (((LhsTraits::Flags & NoPreferredStorageOrderBit) && (RhsTraits::Flags & RowMajorBit)) || + ((RhsTraits::Flags & NoPreferredStorageOrderBit) && (LhsTraits::Flags & RowMajorBit))) + ? RowMajorBit + : NoPreferredStorageOrderBit + }; +}; + +struct TransposeProductEnum { + // convenience enumerations to specialize transposed products + enum : int { + Default = 0x00, + Matrix = 0x01, + Permutation = 0x02, + MatrixMatrix = (Matrix << 8) | Matrix, + MatrixPermutation = (Matrix << 8) | Permutation, + PermutationMatrix = (Permutation << 8) | Matrix + }; +}; +template +struct TransposeKind { + static constexpr int Kind = is_matrix_base_xpr::value ? TransposeProductEnum::Matrix + : is_permutation_base_xpr::value ? TransposeProductEnum::Permutation + : TransposeProductEnum::Default; +}; + +template +struct TransposeProductKind { + static constexpr int Kind = (TransposeKind::Kind << 8) | TransposeKind::Kind; +}; + +template ::Kind> +struct product_transpose_helper { + // by default, don't optimize the transposed product + using Derived = Product; + using Scalar = typename Derived::Scalar; + using TransposeType = Transpose; + using ConjugateTransposeType = CwiseUnaryOp, TransposeType>; + using AdjointType = std::conditional_t::IsComplex, ConjugateTransposeType, TransposeType>; + + // return (lhs * rhs)^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(derived); + } + // return (lhs * rhs)^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(TransposeType(derived)); + } +}; + +template +struct product_transpose_helper { + // expand the transposed matrix-matrix product + using Derived = Product; + + using LhsScalar = typename traits::Scalar; + using LhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using LhsConjugateTransposeType = CwiseUnaryOp, LhsTransposeType>; + using LhsAdjointType = + std::conditional_t::IsComplex, LhsConjugateTransposeType, LhsTransposeType>; + + using RhsScalar = typename traits::Scalar; + using RhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using RhsConjugateTransposeType = CwiseUnaryOp, RhsTransposeType>; + using RhsAdjointType = + std::conditional_t::IsComplex, RhsConjugateTransposeType, RhsTransposeType>; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^T * lhs^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsTransposeType(derived.rhs()), LhsTransposeType(derived.lhs())); + } + // return rhs^H * lhs^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsAdjointType(RhsTransposeType(derived.rhs())), + LhsAdjointType(LhsTransposeType(derived.lhs()))); + } +}; +template +struct product_transpose_helper { + // expand the transposed permutation-matrix product + using Derived = Product; + + using LhsInverseType = typename PermutationBase::InverseReturnType; + + using RhsScalar = typename traits::Scalar; + using RhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using RhsConjugateTransposeType = CwiseUnaryOp, RhsTransposeType>; + using RhsAdjointType = + std::conditional_t::IsComplex, RhsConjugateTransposeType, RhsTransposeType>; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^T * lhs^-1 + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsTransposeType(derived.rhs()), LhsInverseType(derived.lhs())); + } + // return rhs^H * lhs^-1 + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsAdjointType(RhsTransposeType(derived.rhs())), LhsInverseType(derived.lhs())); + } +}; +template +struct product_transpose_helper { + // expand the transposed matrix-permutation product + using Derived = Product; + + using LhsScalar = typename traits::Scalar; + using LhsTransposeType = typename DenseBase::ConstTransposeReturnType; + using LhsConjugateTransposeType = CwiseUnaryOp, LhsTransposeType>; + using LhsAdjointType = + std::conditional_t::IsComplex, LhsConjugateTransposeType, LhsTransposeType>; + + using RhsInverseType = typename PermutationBase::InverseReturnType; + + using TransposeType = Product; + using AdjointType = Product; + + // return rhs^-1 * lhs^T + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeType run_transpose(const Derived& derived) { + return TransposeType(RhsInverseType(derived.rhs()), LhsTransposeType(derived.lhs())); + } + // return rhs^-1 * lhs^H + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointType run_adjoint(const Derived& derived) { + return AdjointType(RhsInverseType(derived.rhs()), LhsAdjointType(LhsTransposeType(derived.lhs()))); + } +}; + +} // end namespace internal + +/** \class Product + * \ingroup Core_Module + * + * \brief Expression of the product of two arbitrary matrices or vectors + * + * \tparam Lhs_ the type of the left-hand side expression + * \tparam Rhs_ the type of the right-hand side expression + * + * This class represents an expression of the product of two arbitrary matrices. + * + * The other template parameters are: + * \tparam Option can be DefaultProduct, AliasFreeProduct, or LazyProduct + * + */ +template +class Product + : public ProductImpl::StorageKind, typename internal::traits::StorageKind, + internal::product_type::ret>::ret> { + public: + typedef Lhs_ Lhs; + typedef Rhs_ Rhs; + + typedef + typename ProductImpl::StorageKind, typename internal::traits::StorageKind, + internal::product_type::ret>::ret>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Product) + + typedef typename internal::ref_selector::type LhsNested; + typedef typename internal::ref_selector::type RhsNested; + typedef internal::remove_all_t LhsNestedCleaned; + typedef internal::remove_all_t RhsNestedCleaned; + + using TransposeReturnType = typename internal::product_transpose_helper::TransposeType; + using AdjointReturnType = typename internal::product_transpose_helper::AdjointType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Product(const Lhs& lhs, const Rhs& rhs) : m_lhs(lhs), m_rhs(rhs) { + eigen_assert(lhs.cols() == rhs.rows() && "invalid matrix product" && + "if you wanted a coeff-wise or a dot product use the respective explicit functions"); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_lhs.rows(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const LhsNestedCleaned& lhs() const { return m_lhs; } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const RhsNestedCleaned& rhs() const { return m_rhs; } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TransposeReturnType transpose() const { + return internal::product_transpose_helper::run_transpose(*this); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE AdjointReturnType adjoint() const { + return internal::product_transpose_helper::run_adjoint(*this); + } + + protected: + LhsNested m_lhs; + RhsNested m_rhs; +}; + +namespace internal { + +template ::ret> +class dense_product_base : public internal::dense_xpr_base>::type {}; + +/** Conversion to scalar for inner-products */ +template +class dense_product_base + : public internal::dense_xpr_base>::type { + typedef Product ProductXpr; + typedef typename internal::dense_xpr_base::type Base; + + public: + using Base::derived; + typedef typename Base::Scalar Scalar; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE operator const Scalar() const { + return internal::evaluator(derived()).coeff(0, 0); + } +}; + +} // namespace internal + +// Generic API dispatcher +template +class ProductImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type { + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +template +class ProductImpl : public internal::dense_product_base { + typedef Product Derived; + + public: + typedef typename internal::dense_product_base Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + protected: + enum { + IsOneByOne = (RowsAtCompileTime == 1 || RowsAtCompileTime == Dynamic) && + (ColsAtCompileTime == 1 || ColsAtCompileTime == Dynamic), + EnableCoeff = IsOneByOne || Option == LazyProduct + }; + + public: + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert((Option == LazyProduct) || (this->rows() == 1 && this->cols() == 1)); + + return internal::evaluator(derived()).coeff(row, col); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index i) const { + EIGEN_STATIC_ASSERT(EnableCoeff, THIS_METHOD_IS_ONLY_FOR_INNER_OR_LAZY_PRODUCTS); + eigen_assert((Option == LazyProduct) || (this->rows() == 1 && this->cols() == 1)); + + return internal::evaluator(derived()).coeff(i); + } +}; + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ProductEvaluators.h b/o-voxel/third_party/eigen/Eigen/src/Core/ProductEvaluators.h new file mode 100644 index 0000000000000000000000000000000000000000..274a782c4ab52d614b1da6a16cf988836b296c2c --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ProductEvaluators.h @@ -0,0 +1,1287 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2011 Jitse Niesen +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PRODUCTEVALUATORS_H +#define EIGEN_PRODUCTEVALUATORS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/** \internal + * Evaluator of a product expression. + * Since products require special treatments to handle all possible cases, + * we simply defer the evaluation logic to a product_evaluator class + * which offers more partial specialization possibilities. + * + * \sa class product_evaluator + */ +template +struct evaluator> : public product_evaluator> { + typedef Product XprType; + typedef product_evaluator Base; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) : Base(xpr) {} +}; + +// Catch "scalar * ( A * B )" and transform it to "(A*scalar) * B" +// TODO we should apply that rule only if that's really helpful +template +struct evaluator_assume_aliasing, + const CwiseNullaryOp, Plain1>, + const Product>> { + static const bool value = true; +}; +template +struct evaluator, + const CwiseNullaryOp, Plain1>, + const Product>> + : public evaluator> { + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain1>, + const Product> + XprType; + typedef evaluator> Base; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) + : Base(xpr.lhs().functor().m_other * xpr.rhs().lhs() * xpr.rhs().rhs()) {} +}; + +template +struct evaluator, DiagIndex>> + : public evaluator, DiagIndex>> { + typedef Diagonal, DiagIndex> XprType; + typedef evaluator, DiagIndex>> Base; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit evaluator(const XprType& xpr) + : Base(Diagonal, DiagIndex>( + Product(xpr.nestedExpression().lhs(), xpr.nestedExpression().rhs()), xpr.index())) {} +}; + +// Helper class to perform a matrix product with the destination at hand. +// Depending on the sizes of the factors, there are different evaluation strategies +// as controlled by internal::product_type. +template ::Shape, + typename RhsShape = typename evaluator_traits::Shape, + int ProductType = internal::product_type::value> +struct generic_product_impl; + +template +struct evaluator_assume_aliasing> { + static const bool value = true; +}; + +// This is the default evaluator implementation for products: +// It creates a temporary and call generic_product_impl +template +struct product_evaluator, ProductTag, LhsShape, RhsShape> + : public evaluator::PlainObject> { + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef evaluator Base; + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) + : m_result(xpr.rows(), xpr.cols()) { + internal::construct_at(this, m_result); + + // FIXME shall we handle nested_eval here?, + // if so, then we must take care at removing the call to nested_eval in the specializations (e.g., in + // permutation_matrix_product, transposition_matrix_product, etc.) + // typedef typename internal::nested_eval::type LhsNested; + // typedef typename internal::nested_eval::type RhsNested; + // typedef internal::remove_all_t LhsNestedCleaned; + // typedef internal::remove_all_t RhsNestedCleaned; + // + // const LhsNested lhs(xpr.lhs()); + // const RhsNested rhs(xpr.rhs()); + // + // generic_product_impl::evalTo(m_result, lhs, rhs); + + generic_product_impl::evalTo(m_result, xpr.lhs(), xpr.rhs()); + } + + protected: + PlainObject m_result; +}; + +// The following three shortcuts are enabled only if the scalar types match exactly. +// TODO: we could enable them for different scalar types when the product is not vectorized. + +// Dense = Product +template +struct Assignment, internal::assign_op, Dense2Dense, + std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> { + typedef Product SrcXprType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const internal::assign_op&) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + // FIXME shall we handle nested_eval here? + generic_product_impl::evalTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense += Product +template +struct Assignment, internal::add_assign_op, Dense2Dense, + std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> { + typedef Product SrcXprType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const internal::add_assign_op&) { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + // FIXME shall we handle nested_eval here? + generic_product_impl::addTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense -= Product +template +struct Assignment, internal::sub_assign_op, Dense2Dense, + std::enable_if_t<(Options == DefaultProduct || Options == AliasFreeProduct)>> { + typedef Product SrcXprType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const internal::sub_assign_op&) { + eigen_assert(dst.rows() == src.rows() && dst.cols() == src.cols()); + // FIXME shall we handle nested_eval here? + generic_product_impl::subTo(dst, src.lhs(), src.rhs()); + } +}; + +// Dense ?= scalar * Product +// TODO we should apply that rule if that's really helpful +// for instance, this is not good for inner products +template +struct Assignment, + const CwiseNullaryOp, Plain>, + const Product>, + AssignFunc, Dense2Dense> { + typedef CwiseBinaryOp, + const CwiseNullaryOp, Plain>, + const Product> + SrcXprType; + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const AssignFunc& func) { + call_assignment_no_alias(dst, (src.lhs().functor().m_other * src.rhs().lhs()) * src.rhs().rhs(), func); + } +}; + +//---------------------------------------- +// Catch "Dense ?= xpr + Product<>" expression to save one temporary +// FIXME we could probably enable these rules for any product, i.e., not only Dense and DefaultProduct + +template +struct evaluator_assume_aliasing< + CwiseBinaryOp< + internal::scalar_sum_op::Scalar>, + const OtherXpr, const Product>, + DenseShape> { + static const bool value = true; +}; + +template +struct evaluator_assume_aliasing< + CwiseBinaryOp< + internal::scalar_difference_op::Scalar>, + const OtherXpr, const Product>, + DenseShape> { + static const bool value = true; +}; + +template +struct assignment_from_xpr_op_product { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(DstXprType& dst, const SrcXprType& src, + const InitialFunc& /*func*/) { + call_assignment_no_alias(dst, src.lhs(), Func1()); + call_assignment_no_alias(dst, src.rhs(), Func2()); + } +}; + +#define EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(ASSIGN_OP, BINOP, ASSIGN_OP2) \ + template \ + struct Assignment, const OtherXpr, \ + const Product>, \ + internal::ASSIGN_OP, Dense2Dense> \ + : assignment_from_xpr_op_product, \ + internal::ASSIGN_OP, \ + internal::ASSIGN_OP2> {} + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_sum_op, add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_sum_op, add_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_sum_op, sub_assign_op); + +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(assign_op, scalar_difference_op, sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(add_assign_op, scalar_difference_op, sub_assign_op); +EIGEN_CATCH_ASSIGN_XPR_OP_PRODUCT(sub_assign_op, scalar_difference_op, add_assign_op); + +//---------------------------------------- + +template +struct generic_product_impl { + using impl = default_inner_product_impl; + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + dst.coeffRef(0, 0) = impl::run(lhs, rhs); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + dst.coeffRef(0, 0) += impl::run(lhs, rhs); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + dst.coeffRef(0, 0) -= impl::run(lhs, rhs); + } +}; + +/*********************************************************************** + * Implementation of outer dense * dense vector product + ***********************************************************************/ + +// Column major result +template +void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func, + const false_type&) { + evaluator rhsEval(rhs); + ei_declare_local_nested_eval(Lhs, lhs, Rhs::SizeAtCompileTime, actual_lhs); + // FIXME if cols is large enough, then it might be useful to make sure that lhs is sequentially stored + // FIXME not very good if rhs is real and lhs complex while alpha is real too + const Index cols = dst.cols(); + for (Index j = 0; j < cols; ++j) func(dst.col(j), rhsEval.coeff(Index(0), j) * actual_lhs); +} + +// Row major result +template +void EIGEN_DEVICE_FUNC outer_product_selector_run(Dst& dst, const Lhs& lhs, const Rhs& rhs, const Func& func, + const true_type&) { + evaluator lhsEval(lhs); + ei_declare_local_nested_eval(Rhs, rhs, Lhs::SizeAtCompileTime, actual_rhs); + // FIXME if rows is large enough, then it might be useful to make sure that rhs is sequentially stored + // FIXME not very good if lhs is real and rhs complex while alpha is real too + const Index rows = dst.rows(); + for (Index i = 0; i < rows; ++i) func(dst.row(i), lhsEval.coeff(i, Index(0)) * actual_rhs); +} + +template +struct generic_product_impl { + template + struct is_row_major : bool_constant<(int(T::Flags) & RowMajorBit)> {}; + typedef typename Product::Scalar Scalar; + + // TODO it would be nice to be able to exploit our *_assign_op functors for that purpose + struct set { + template + EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() = src; + } + }; + struct add { + /** Add to dst. */ + template + EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += src; + } + }; + struct sub { + template + EIGEN_DEVICE_FUNC void operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() -= src; + } + }; + /** Scaled add. */ + struct adds { + Scalar m_scale; + /** Constructor */ + explicit adds(const Scalar& s) : m_scale(s) {} + /** Scaled add to dst. */ + template + void EIGEN_DEVICE_FUNC operator()(const Dst& dst, const Src& src) const { + dst.const_cast_derived() += m_scale * src; + } + }; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + internal::outer_product_selector_run(dst, lhs, rhs, set(), is_row_major()); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + internal::outer_product_selector_run(dst, lhs, rhs, add(), is_row_major()); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + internal::outer_product_selector_run(dst, lhs, rhs, sub(), is_row_major()); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, + const Scalar& alpha) { + internal::outer_product_selector_run(dst, lhs, rhs, adds(alpha), is_row_major()); + } +}; + +// This base class provides default implementations for evalTo, addTo, subTo, in terms of scaleAndAddTo +template +struct generic_product_impl_base { + typedef typename Product::Scalar Scalar; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + dst.setZero(); + scaleAndAddTo(dst, lhs, rhs, Scalar(1)); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + scaleAndAddTo(dst, lhs, rhs, Scalar(1)); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + scaleAndAddTo(dst, lhs, rhs, Scalar(-1)); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dst& dst, const Lhs& lhs, const Rhs& rhs, + const Scalar& alpha) { + Derived::scaleAndAddTo(dst, lhs, rhs, alpha); + } +}; + +template +struct generic_product_impl + : generic_product_impl_base> { + typedef typename nested_eval::type LhsNested; + typedef typename nested_eval::type RhsNested; + typedef typename Product::Scalar Scalar; + enum { Side = Lhs::IsVectorAtCompileTime ? OnTheLeft : OnTheRight }; + typedef internal::remove_all_t> MatrixType; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, + const Scalar& alpha) { + // Fallback to inner product if both the lhs and rhs is a runtime vector. + if (lhs.rows() == 1 && rhs.cols() == 1) { + dst.coeffRef(0, 0) += alpha * lhs.row(0).conjugate().dot(rhs.col(0)); + return; + } + LhsNested actual_lhs(lhs); + RhsNested actual_rhs(rhs); + internal::gemv_dense_selector::HasUsableDirectAccess)>::run(actual_lhs, + actual_rhs, dst, + alpha); + } +}; + +template +struct generic_product_impl { + typedef typename Product::Scalar Scalar; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + // Same as: dst.noalias() = lhs.lazyProduct(rhs); + // but easier on the compiler side + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::assign_op()); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void addTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + // dst.noalias() += lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::add_assign_op()); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void subTo(Dst& dst, const Lhs& lhs, const Rhs& rhs) { + // dst.noalias() -= lhs.lazyProduct(rhs); + call_assignment_no_alias(dst, lhs.lazyProduct(rhs), internal::sub_assign_op()); + } + + // This is a special evaluation path called from generic_product_impl<...,GemmProduct> in file GeneralMatrixMatrix.h + // This variant tries to extract scalar multiples from both the LHS and RHS and factor them out. For instance: + // dst {,+,-}= (s1*A)*(B*s2) + // will be rewritten as: + // dst {,+,-}= (s1*s2) * (A.lazyProduct(B)) + // There are at least four benefits of doing so: + // 1 - huge performance gain for heap-allocated matrix types as it save costly allocations. + // 2 - it is faster than simply by-passing the heap allocation through stack allocation. + // 3 - it makes this fallback consistent with the heavy GEMM routine. + // 4 - it fully by-passes huge stack allocation attempts when multiplying huge fixed-size matrices. + // (see https://stackoverflow.com/questions/54738495) + // For small fixed sizes matrices, however, the gains are less obvious, it is sometimes x2 faster, but sometimes x3 + // slower, and the behavior depends also a lot on the compiler... This is why this re-writing strategy is currently + // enabled only when falling back from the main GEMM. + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic(Dst& dst, const Lhs& lhs, const Rhs& rhs, + const Func& func) { + enum { + HasScalarFactor = blas_traits::HasScalarFactor || blas_traits::HasScalarFactor, + ConjLhs = blas_traits::NeedToConjugate, + ConjRhs = blas_traits::NeedToConjugate + }; + // FIXME: in c++11 this should be auto, and extractScalarFactor should also return auto + // this is important for real*complex_mat + Scalar actualAlpha = combine_scalar_factors(lhs, rhs); + + eval_dynamic_impl(dst, blas_traits::extract(lhs).template conjugateIf(), + blas_traits::extract(rhs).template conjugateIf(), func, actualAlpha, + bool_constant()); + } + + protected: + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, + const Func& func, const Scalar& s /* == 1 */, + false_type) { + EIGEN_UNUSED_VARIABLE(s); + eigen_internal_assert(numext::is_exactly_one(s)); + call_restricted_packet_assignment_no_alias(dst, lhs.lazyProduct(rhs), func); + } + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void eval_dynamic_impl(Dst& dst, const LhsT& lhs, const RhsT& rhs, + const Func& func, const Scalar& s, true_type) { + call_restricted_packet_assignment_no_alias(dst, s * lhs.lazyProduct(rhs), func); + } +}; + +// This specialization enforces the use of a coefficient-based evaluation strategy +template +struct generic_product_impl + : generic_product_impl {}; + +// Case 2: Evaluate coeff by coeff +// +// This is mostly taken from CoeffBasedProduct.h +// The main difference is that we add an extra argument to the etor_product_*_impl::run() function +// for the inner dimension of the product, because evaluator object do not know their size. + +template +struct etor_product_coeff_impl; + +template +struct etor_product_packet_impl; + +template +struct product_evaluator, ProductTag, DenseShape, DenseShape> + : evaluator_base> { + typedef Product XprType; + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) + : m_lhs(xpr.lhs()), + m_rhs(xpr.rhs()), + m_lhsImpl(m_lhs), // FIXME the creation of the evaluator objects should result in a no-op, but check that! + m_rhsImpl(m_rhs), // Moreover, they are only useful for the packet path, so we could completely disable + // them when not needed, or perhaps declare them on the fly on the packet method... We + // have experiment to check what's best. + m_innerDim(xpr.lhs().cols()) { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::AddCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); +#if 0 + std::cerr << "LhsOuterStrideBytes= " << LhsOuterStrideBytes << "\n"; + std::cerr << "RhsOuterStrideBytes= " << RhsOuterStrideBytes << "\n"; + std::cerr << "LhsAlignment= " << LhsAlignment << "\n"; + std::cerr << "RhsAlignment= " << RhsAlignment << "\n"; + std::cerr << "CanVectorizeLhs= " << CanVectorizeLhs << "\n"; + std::cerr << "CanVectorizeRhs= " << CanVectorizeRhs << "\n"; + std::cerr << "CanVectorizeInner= " << CanVectorizeInner << "\n"; + std::cerr << "EvalToRowMajor= " << EvalToRowMajor << "\n"; + std::cerr << "Alignment= " << Alignment << "\n"; + std::cerr << "Flags= " << Flags << "\n"; +#endif + } + + // Everything below here is taken from CoeffBasedProduct.h + + typedef typename internal::nested_eval::type LhsNested; + typedef typename internal::nested_eval::type RhsNested; + + typedef internal::remove_all_t LhsNestedCleaned; + typedef internal::remove_all_t RhsNestedCleaned; + + typedef evaluator LhsEtorType; + typedef evaluator RhsEtorType; + + enum { + RowsAtCompileTime = LhsNestedCleaned::RowsAtCompileTime, + ColsAtCompileTime = RhsNestedCleaned::ColsAtCompileTime, + InnerSize = min_size_prefer_fixed(LhsNestedCleaned::ColsAtCompileTime, RhsNestedCleaned::RowsAtCompileTime), + MaxRowsAtCompileTime = LhsNestedCleaned::MaxRowsAtCompileTime, + MaxColsAtCompileTime = RhsNestedCleaned::MaxColsAtCompileTime + }; + + typedef typename find_best_packet::type LhsVecPacketType; + typedef typename find_best_packet::type RhsVecPacketType; + + enum { + + LhsCoeffReadCost = LhsEtorType::CoeffReadCost, + RhsCoeffReadCost = RhsEtorType::CoeffReadCost, + CoeffReadCost = InnerSize == 0 ? NumTraits::ReadCost + : InnerSize == Dynamic + ? HugeCost + : InnerSize * (NumTraits::MulCost + int(LhsCoeffReadCost) + int(RhsCoeffReadCost)) + + (InnerSize - 1) * NumTraits::AddCost, + + Unroll = CoeffReadCost <= EIGEN_UNROLLING_LIMIT, + + LhsFlags = LhsEtorType::Flags, + RhsFlags = RhsEtorType::Flags, + + LhsRowMajor = LhsFlags & RowMajorBit, + RhsRowMajor = RhsFlags & RowMajorBit, + + LhsVecPacketSize = unpacket_traits::size, + RhsVecPacketSize = unpacket_traits::size, + + // Here, we don't care about alignment larger than the usable packet size. + LhsAlignment = + plain_enum_min(LhsEtorType::Alignment, LhsVecPacketSize* int(sizeof(typename LhsNestedCleaned::Scalar))), + RhsAlignment = + plain_enum_min(RhsEtorType::Alignment, RhsVecPacketSize* int(sizeof(typename RhsNestedCleaned::Scalar))), + + SameType = is_same::value, + + CanVectorizeRhs = bool(RhsRowMajor) && (RhsFlags & PacketAccessBit) && (ColsAtCompileTime != 1), + CanVectorizeLhs = (!LhsRowMajor) && (LhsFlags & PacketAccessBit) && (RowsAtCompileTime != 1), + + EvalToRowMajor = (MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1) ? 1 + : (MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1) + ? 0 + : (bool(RhsRowMajor) && !CanVectorizeLhs), + + Flags = ((int(LhsFlags) | int(RhsFlags)) & HereditaryBits & ~RowMajorBit) | + (EvalToRowMajor ? RowMajorBit : 0) + // TODO enable vectorization for mixed types + | (SameType && (CanVectorizeLhs || CanVectorizeRhs) ? PacketAccessBit : 0) | + (XprType::IsVectorAtCompileTime ? LinearAccessBit : 0), + + LhsOuterStrideBytes = + int(LhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename LhsNestedCleaned::Scalar)), + RhsOuterStrideBytes = + int(RhsNestedCleaned::OuterStrideAtCompileTime) * int(sizeof(typename RhsNestedCleaned::Scalar)), + + Alignment = bool(CanVectorizeLhs) + ? (LhsOuterStrideBytes <= 0 || (int(LhsOuterStrideBytes) % plain_enum_max(1, LhsAlignment)) != 0 + ? 0 + : LhsAlignment) + : bool(CanVectorizeRhs) + ? (RhsOuterStrideBytes <= 0 || (int(RhsOuterStrideBytes) % plain_enum_max(1, RhsAlignment)) != 0 + ? 0 + : RhsAlignment) + : 0, + + /* CanVectorizeInner deserves special explanation. It does not affect the product flags. It is not used outside + * of Product. If the Product itself is not a packet-access expression, there is still a chance that the inner + * loop of the product might be vectorized. This is the meaning of CanVectorizeInner. Since it doesn't affect + * the Flags, it is safe to make this value depend on ActualPacketAccessBit, that doesn't affect the ABI. + */ + CanVectorizeInner = SameType && LhsRowMajor && (!RhsRowMajor) && + (int(LhsFlags) & int(RhsFlags) & ActualPacketAccessBit) && + (int(InnerSize) % packet_traits::size == 0) + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index row, Index col) const { + return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum(); + } + + /* Allow index-based non-packet access. It is impossible though to allow index-based packed access, + * which is why we don't set the LinearAccessBit. + * TODO: this seems possible when the result is a vector + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index index) const { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0; + return (m_lhs.row(row).transpose().cwiseProduct(m_rhs.col(col))).sum(); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index row, Index col) const { + PacketType res; + typedef etor_product_packet_impl + PacketImpl; + PacketImpl::run(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res); + return res; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packet(Index index) const { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0; + return packet(row, col); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index row, Index col, Index begin, + Index count) const { + PacketType res; + typedef etor_product_packet_impl + PacketImpl; + PacketImpl::run_segment(row, col, m_lhsImpl, m_rhsImpl, m_innerDim, res, begin, count); + return res; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const PacketType packetSegment(Index index, Index begin, Index count) const { + const Index row = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? 0 : index; + const Index col = (RowsAtCompileTime == 1 || MaxRowsAtCompileTime == 1) ? index : 0; + return packetSegment(row, col, begin, count); + } + + protected: + add_const_on_value_type_t m_lhs; + add_const_on_value_type_t m_rhs; + + LhsEtorType m_lhsImpl; + RhsEtorType m_rhsImpl; + + // TODO: Get rid of m_innerDim if known at compile time + Index m_innerDim; +}; + +template +struct product_evaluator, LazyCoeffBasedProductMode, DenseShape, DenseShape> + : product_evaluator, CoeffBasedProductMode, DenseShape, DenseShape> { + typedef Product XprType; + typedef Product BaseProduct; + typedef product_evaluator Base; + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit product_evaluator(const XprType& xpr) + : Base(BaseProduct(xpr.lhs(), xpr.rhs())) {} +}; + +/**************************************** +*** Coeff based product, Packet path *** +****************************************/ + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res) { + etor_product_packet_impl::run(row, col, lhs, rhs, + innerDim, res); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex - 1))), + rhs.template packet(Index(UnrollingIndex - 1), col), res); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + etor_product_packet_impl::run_segment( + row, col, lhs, rhs, innerDim, res, begin, count); + res = pmadd(pset1(lhs.coeff(row, Index(UnrollingIndex - 1))), + rhs.template packetSegment(Index(UnrollingIndex - 1), col, begin, count), res); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res) { + etor_product_packet_impl::run(row, col, lhs, rhs, + innerDim, res); + res = pmadd(lhs.template packet(row, Index(UnrollingIndex - 1)), + pset1(rhs.coeff(Index(UnrollingIndex - 1), col)), res); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + etor_product_packet_impl::run_segment( + row, col, lhs, rhs, innerDim, res, begin, count); + res = pmadd(lhs.template packetSegment(row, Index(UnrollingIndex - 1), begin, count), + pset1(rhs.coeff(Index(UnrollingIndex - 1), col)), res); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res) { + res = pmul(pset1(lhs.coeff(row, Index(0))), rhs.template packet(Index(0), col)); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res, Index begin, + Index count) { + res = pmul(pset1(lhs.coeff(row, Index(0))), + rhs.template packetSegment(Index(0), col, begin, count)); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res) { + res = pmul(lhs.template packet(row, Index(0)), pset1(rhs.coeff(Index(0), col))); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index /*innerDim*/, Packet& res, Index begin, + Index count) { + res = pmul(lhs.template packetSegment(row, Index(0), begin, count), + pset1(rhs.coeff(Index(0), col))); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, + const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) { + res = pset1(typename unpacket_traits::type(0)); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, + const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res, + Index /*begin*/, Index /*count*/) { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, + const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res) { + res = pset1(typename unpacket_traits::type(0)); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index /*row*/, Index /*col*/, const Lhs& /*lhs*/, + const Rhs& /*rhs*/, Index /*innerDim*/, Packet& res, + Index /*begin*/, Index /*count*/) { + res = pset1(typename unpacket_traits::type(0)); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res) { + res = pset1(typename unpacket_traits::type(0)); + for (Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packet(i, col), res); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + res = pset1(typename unpacket_traits::type(0)); + for (Index i = 0; i < innerDim; ++i) + res = pmadd(pset1(lhs.coeff(row, i)), rhs.template packetSegment(i, col, begin, count), + res); + } +}; + +template +struct etor_product_packet_impl { + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res) { + res = pset1(typename unpacket_traits::type(0)); + for (Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packet(row, i), pset1(rhs.coeff(i, col)), res); + } + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run_segment(Index row, Index col, const Lhs& lhs, const Rhs& rhs, + Index innerDim, Packet& res, Index begin, Index count) { + res = pset1(typename unpacket_traits::type(0)); + for (Index i = 0; i < innerDim; ++i) + res = pmadd(lhs.template packetSegment(row, i, begin, count), pset1(rhs.coeff(i, col)), + res); + } +}; + +/*************************************************************************** + * Triangular products + ***************************************************************************/ +template +struct triangular_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base> { + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { + triangular_product_impl::run( + dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl + : generic_product_impl_base> { + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { + triangular_product_impl::run( + dst, lhs, rhs.nestedExpression(), alpha); + } +}; + +/*************************************************************************** + * SelfAdjoint products + ***************************************************************************/ +template +struct selfadjoint_product_impl; + +template +struct generic_product_impl + : generic_product_impl_base> { + typedef typename Product::Scalar Scalar; + + template + static EIGEN_DEVICE_FUNC void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { + selfadjoint_product_impl::run( + dst, lhs.nestedExpression(), rhs, alpha); + } +}; + +template +struct generic_product_impl + : generic_product_impl_base> { + typedef typename Product::Scalar Scalar; + + template + static void scaleAndAddTo(Dest& dst, const Lhs& lhs, const Rhs& rhs, const Scalar& alpha) { + selfadjoint_product_impl::run( + dst, lhs, rhs.nestedExpression(), alpha); + } +}; + +/*************************************************************************** + * Diagonal products + ***************************************************************************/ + +template +struct diagonal_product_evaluator_base : evaluator_base { + typedef typename ScalarBinaryOpTraits::ReturnType Scalar; + + public: + enum { + CoeffReadCost = int(NumTraits::MulCost) + int(evaluator::CoeffReadCost) + + int(evaluator::CoeffReadCost), + + MatrixFlags = evaluator::Flags, + DiagFlags = evaluator::Flags, + + StorageOrder_ = (Derived::MaxRowsAtCompileTime == 1 && Derived::MaxColsAtCompileTime != 1) ? RowMajor + : (Derived::MaxColsAtCompileTime == 1 && Derived::MaxRowsAtCompileTime != 1) ? ColMajor + : MatrixFlags & RowMajorBit ? RowMajor + : ColMajor, + SameStorageOrder_ = int(StorageOrder_) == ((MatrixFlags & RowMajorBit) ? RowMajor : ColMajor), + + ScalarAccessOnDiag_ = !((int(StorageOrder_) == ColMajor && int(ProductOrder) == OnTheLeft) || + (int(StorageOrder_) == RowMajor && int(ProductOrder) == OnTheRight)), + SameTypes_ = is_same::value, + // FIXME currently we need same types, but in the future the next rule should be the one + // Vectorizable_ = bool(int(MatrixFlags)&PacketAccessBit) && ((!_PacketOnDiag) || (SameTypes_ && + // bool(int(DiagFlags)&PacketAccessBit))), + Vectorizable_ = bool(int(MatrixFlags) & PacketAccessBit) && SameTypes_ && + (SameStorageOrder_ || (MatrixFlags & LinearAccessBit) == LinearAccessBit) && + (ScalarAccessOnDiag_ || (bool(int(DiagFlags) & PacketAccessBit))), + LinearAccessMask_ = + (MatrixType::RowsAtCompileTime == 1 || MatrixType::ColsAtCompileTime == 1) ? LinearAccessBit : 0, + Flags = + ((HereditaryBits | LinearAccessMask_) & (unsigned int)(MatrixFlags)) | (Vectorizable_ ? PacketAccessBit : 0), + Alignment = evaluator::Alignment, + + AsScalarProduct = + (DiagonalType::SizeAtCompileTime == 1) || + (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::RowsAtCompileTime == 1 && + ProductOrder == OnTheLeft) || + (DiagonalType::SizeAtCompileTime == Dynamic && MatrixType::ColsAtCompileTime == 1 && ProductOrder == OnTheRight) + }; + + EIGEN_DEVICE_FUNC diagonal_product_evaluator_base(const MatrixType& mat, const DiagonalType& diag) + : m_diagImpl(diag), m_matImpl(mat) { + EIGEN_INTERNAL_CHECK_COST_VALUE(NumTraits::MulCost); + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index idx) const { + if (AsScalarProduct) + return m_diagImpl.coeff(0) * m_matImpl.coeff(idx); + else + return m_diagImpl.coeff(idx) * m_matImpl.coeff(idx); + } + + protected: + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::true_type) const { + return internal::pmul(m_matImpl.template packet(row, col), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_impl(Index row, Index col, Index id, internal::false_type) const { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = plain_enum_min( + LoadMode, + ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packet(row, col), + m_diagImpl.template packet(id)); + } + + template + EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count, + internal::true_type) const { + return internal::pmul(m_matImpl.template packetSegment(row, col, begin, count), + internal::pset1(m_diagImpl.coeff(id))); + } + + template + EIGEN_STRONG_INLINE PacketType packet_segment_impl(Index row, Index col, Index id, Index begin, Index count, + internal::false_type) const { + enum { + InnerSize = (MatrixType::Flags & RowMajorBit) ? MatrixType::ColsAtCompileTime : MatrixType::RowsAtCompileTime, + DiagonalPacketLoadMode = plain_enum_min( + LoadMode, + ((InnerSize % 16) == 0) ? int(Aligned16) : int(evaluator::Alignment)) // FIXME hardcoded 16!! + }; + return internal::pmul(m_matImpl.template packetSegment(row, col, begin, count), + m_diagImpl.template packetSegment(id, begin, count)); + } + + evaluator m_diagImpl; + evaluator m_matImpl; +}; + +// diagonal * dense +template +struct product_evaluator, ProductTag, DiagonalShape, DenseShape> + : diagonal_product_evaluator_base, + OnTheLeft> { + typedef diagonal_product_evaluator_base, + OnTheLeft> + Base; + using Base::coeff; + using Base::m_diagImpl; + using Base::m_matImpl; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + typedef typename Lhs::DiagonalVectorType DiagonalType; + + static constexpr int StorageOrder = Base::StorageOrder_; + using IsRowMajor_t = bool_constant; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.rhs(), xpr.lhs().diagonal()) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { + return m_diagImpl.coeff(row) * m_matImpl.coeff(row, col); + } + +#ifndef EIGEN_GPUCC + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_impl(row, col, row, IsRowMajor_t()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const { + return packet(int(StorageOrder) == ColMajor ? idx : 0, + int(StorageOrder) == ColMajor ? 0 : idx); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + // FIXME: NVCC used to complain about the template keyword, but we have to check whether this is still the case. + // See also similar calls below. + return this->template packet_segment_impl(row, col, row, begin, count, IsRowMajor_t()); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const { + return packetSegment(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx, + begin, count); + } +#endif +}; + +// dense * diagonal +template +struct product_evaluator, ProductTag, DenseShape, DiagonalShape> + : diagonal_product_evaluator_base, + OnTheRight> { + typedef diagonal_product_evaluator_base, + OnTheRight> + Base; + using Base::coeff; + using Base::m_diagImpl; + using Base::m_matImpl; + typedef typename Base::Scalar Scalar; + + typedef Product XprType; + typedef typename XprType::PlainObject PlainObject; + + static constexpr int StorageOrder = Base::StorageOrder_; + using IsColMajor_t = bool_constant; + + EIGEN_DEVICE_FUNC explicit product_evaluator(const XprType& xpr) : Base(xpr.lhs(), xpr.rhs().diagonal()) {} + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar coeff(Index row, Index col) const { + return m_matImpl.coeff(row, col) * m_diagImpl.coeff(col); + } + +#ifndef EIGEN_GPUCC + template + EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + return this->template packet_impl(row, col, col, IsColMajor_t()); + } + + template + EIGEN_STRONG_INLINE PacketType packet(Index idx) const { + return packet(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + return this->template packet_segment_impl(row, col, col, begin, count, IsColMajor_t()); + } + + template + EIGEN_STRONG_INLINE PacketType packetSegment(Index idx, Index begin, Index count) const { + return packetSegment(StorageOrder == ColMajor ? idx : 0, StorageOrder == ColMajor ? 0 : idx, + begin, count); + } +#endif +}; + +/*************************************************************************** + * Products with permutation matrices + ***************************************************************************/ + +/** \internal + * \class permutation_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + * This class is specialized for DenseShape below and for SparseShape in SparseCore/SparsePermutation.h + */ +template +struct permutation_matrix_product; + +template +struct permutation_matrix_product { + typedef typename nested_eval::type MatrixType; + typedef remove_all_t MatrixTypeCleaned; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const PermutationType& perm, + const ExpressionType& xpr) { + MatrixType mat(xpr); + const Index n = Side == OnTheLeft ? mat.rows() : mat.cols(); + // FIXME we need an is_same for expression that is not sensitive to constness. For instance + // is_same_xpr, Block >::value should be true. + // if(is_same::value && extract_data(dst) == extract_data(mat)) + if (is_same_dense(dst, mat)) { + // apply the permutation inplace + Matrix mask(perm.size()); + mask.fill(false); + Index r = 0; + while (r < perm.size()) { + // search for the next seed + while (r < perm.size() && mask[r]) r++; + if (r >= perm.size()) break; + // we got one, let's follow it until we are back to the seed + Index k0 = r++; + Index kPrev = k0; + mask.coeffRef(k0) = true; + for (Index k = perm.indices().coeff(k0); k != k0; k = perm.indices().coeff(k)) { + Block(dst, k) + .swap(Block < Dest, Side == OnTheLeft ? 1 : Dest::RowsAtCompileTime, + Side == OnTheRight + ? 1 + : Dest::ColsAtCompileTime > (dst, ((Side == OnTheLeft) ^ Transposed) ? k0 : kPrev)); + + mask.coeffRef(k) = true; + kPrev = k; + } + } + } else { + for (Index i = 0; i < n; ++i) { + Block( + dst, ((Side == OnTheLeft) ^ Transposed) ? perm.indices().coeff(i) : i) + + = + + Block < const MatrixTypeCleaned, + Side == OnTheLeft ? 1 : MatrixTypeCleaned::RowsAtCompileTime, + Side == OnTheRight ? 1 + : MatrixTypeCleaned::ColsAtCompileTime > + (mat, ((Side == OnTheRight) ^ Transposed) ? perm.indices().coeff(i) : i); + } + } + } +}; + +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + permutation_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + permutation_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, PermutationShape, MatrixShape, ProductTag> { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Inverse& lhs, const Rhs& rhs) { + permutation_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, PermutationShape, ProductTag> { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Inverse& rhs) { + permutation_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +/*************************************************************************** + * Products with transpositions matrices + ***************************************************************************/ + +// FIXME could we unify Transpositions and Permutation into a single "shape"?? + +/** \internal + * \class transposition_matrix_product + * Internal helper class implementing the product between a permutation matrix and a matrix. + */ +template +struct transposition_matrix_product { + typedef typename nested_eval::type MatrixType; + typedef remove_all_t MatrixTypeCleaned; + + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void run(Dest& dst, const TranspositionType& tr, + const ExpressionType& xpr) { + MatrixType mat(xpr); + typedef typename TranspositionType::StorageIndex StorageIndex; + const Index size = tr.size(); + StorageIndex j = 0; + + if (!is_same_dense(dst, mat)) dst = mat; + + for (Index k = (Transposed ? size - 1 : 0); Transposed ? k >= 0 : k < size; Transposed ? --k : ++k) + if (Index(j = tr.coeff(k)) != k) { + if (Side == OnTheLeft) + dst.row(k).swap(dst.row(j)); + else if (Side == OnTheRight) + dst.col(k).swap(dst.col(j)); + } + } +}; + +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + transposition_matrix_product::run(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + transposition_matrix_product::run(dst, rhs, lhs); + } +}; + +template +struct generic_product_impl, Rhs, TranspositionsShape, MatrixShape, ProductTag> { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Transpose& lhs, const Rhs& rhs) { + transposition_matrix_product::run(dst, lhs.nestedExpression(), rhs); + } +}; + +template +struct generic_product_impl, MatrixShape, TranspositionsShape, ProductTag> { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Transpose& rhs) { + transposition_matrix_product::run(dst, rhs.nestedExpression(), lhs); + } +}; + +/*************************************************************************** + * skew symmetric products + * for now we just call the generic implementation + ***************************************************************************/ +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + generic_product_impl::evalTo(dst, lhs, + rhs); + } +}; + +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + generic_product_impl::evalTo(dst, lhs, + rhs); + } +}; + +template +struct generic_product_impl { + template + static EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void evalTo(Dest& dst, const Lhs& lhs, const Rhs& rhs) { + generic_product_impl::evalTo(dst, lhs, rhs); + } +}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +template +struct generic_product_impl + : generic_product_impl {}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_PRODUCT_EVALUATORS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Random.h b/o-voxel/third_party/eigen/Eigen/src/Core/Random.h new file mode 100644 index 0000000000000000000000000000000000000000..5e5c34085b0c53068cd3ab790589e17e9c9e6b8d --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Random.h @@ -0,0 +1,207 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_H +#define EIGEN_RANDOM_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct scalar_random_op { + inline const Scalar operator()() const { return random(); } +}; + +template +struct functor_traits > { + enum { Cost = 5 * NumTraits::MulCost, PacketAccess = false, IsRepeatable = false }; +}; + +} // end namespace internal + +/** \returns a random matrix expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * The parameters \a rows and \a cols are the number of rows and of columns of + * the returned matrix. Must be compatible with this MatrixBase type. + * + * \not_reentrant + * + * This variant is meant to be used for dynamic-size matrix types. For fixed-size types, + * it is redundant to pass \a rows and \a cols as arguments, so Random() should be used + * instead. + * + * + * Example: \include MatrixBase_random_int_int.cpp + * Output: \verbinclude MatrixBase_random_int_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * See DenseBase::NullaryExpr(Index, const CustomNullaryOp&) for an example using C++11 random generators. + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index), DenseBase::Random() + */ +template +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index rows, Index cols) { + return NullaryExpr(rows, cols, internal::scalar_random_op()); +} + +/** \returns a random vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * The parameter \a size is the size of the returned vector. + * Must be compatible with this MatrixBase type. + * + * \only_for_vectors + * \not_reentrant + * + * This variant is meant to be used for dynamic-size vector types. For fixed-size types, + * it is redundant to pass \a size as argument, so Random() should be used + * instead. + * + * Example: \include MatrixBase_random_int.cpp + * Output: \verbinclude MatrixBase_random_int.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary vector whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random() + */ +template +inline const typename DenseBase::RandomReturnType DenseBase::Random(Index size) { + return NullaryExpr(size, internal::scalar_random_op()); +} + +/** \returns a fixed-size random matrix or vector expression + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * This variant is only for fixed-size MatrixBase types. For dynamic-size types, you + * need to use the variants taking size arguments. + * + * Example: \include MatrixBase_random.cpp + * Output: \verbinclude MatrixBase_random.out + * + * This expression has the "evaluate before nesting" flag so that it will be evaluated into + * a temporary matrix whenever it is nested in a larger expression. This prevents unexpected + * behavior with expressions involving random matrices. + * + * \not_reentrant + * + * \sa DenseBase::setRandom(), DenseBase::Random(Index,Index), DenseBase::Random(Index) + */ +template +inline const typename DenseBase::RandomReturnType DenseBase::Random() { + return NullaryExpr(RowsAtCompileTime, ColsAtCompileTime, internal::scalar_random_op()); +} + +/** Sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * Example: \include MatrixBase_setRandom.cpp + * Output: \verbinclude MatrixBase_setRandom.out + * + * \sa class CwiseNullaryOp, setRandom(Index), setRandom(Index,Index) + */ +template +EIGEN_DEVICE_FUNC inline Derived& DenseBase::setRandom() { + return *this = Random(rows(), cols()); +} + +/** Resizes to the given \a newSize, and sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \only_for_vectors + * \not_reentrant + * + * Example: \include Matrix_setRandom_int.cpp + * Output: \verbinclude Matrix_setRandom_int.out + * + * \sa DenseBase::setRandom(), setRandom(Index,Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& PlainObjectBase::setRandom(Index newSize) { + resize(newSize); + return setRandom(); +} + +/** Resizes to the given size, and sets all coefficients in this expression to random values. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \param rows the new number of rows + * \param cols the new number of columns + * + * Example: \include Matrix_setRandom_int_int.cpp + * Output: \verbinclude Matrix_setRandom_int_int.out + * + * \sa DenseBase::setRandom(), setRandom(Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& PlainObjectBase::setRandom(Index rows, Index cols) { + resize(rows, cols); + return setRandom(); +} + +/** Resizes to the given size, changing only the number of columns, and sets all + * coefficients in this expression to random values. For the parameter of type + * NoChange_t, just pass the special value \c NoChange. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \sa DenseBase::setRandom(), setRandom(Index), setRandom(Index, NoChange_t), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& PlainObjectBase::setRandom(NoChange_t, Index cols) { + return setRandom(rows(), cols); +} + +/** Resizes to the given size, changing only the number of rows, and sets all + * coefficients in this expression to random values. For the parameter of type + * NoChange_t, just pass the special value \c NoChange. + * + * Numbers are uniformly spread through their whole definition range for integer types, + * and in the [-1:1] range for floating point scalar types. + * + * \not_reentrant + * + * \sa DenseBase::setRandom(), setRandom(Index), setRandom(NoChange_t, Index), class CwiseNullaryOp, DenseBase::Random() + */ +template +EIGEN_STRONG_INLINE Derived& PlainObjectBase::setRandom(Index rows, NoChange_t) { + return setRandom(rows, cols()); +} + +} // end namespace Eigen + +#endif // EIGEN_RANDOM_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/RandomImpl.h b/o-voxel/third_party/eigen/Eigen/src/Core/RandomImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..993dfeaf187f314d4ca7525ba74e7d1338d20b67 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/RandomImpl.h @@ -0,0 +1,262 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2024 Charles Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RANDOM_IMPL_H +#define EIGEN_RANDOM_IMPL_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +/**************************************************************************** + * Implementation of random * + ****************************************************************************/ + +template +struct random_default_impl {}; + +template +struct random_impl : random_default_impl::IsComplex, NumTraits::IsInteger> {}; + +template +struct random_retval { + typedef Scalar type; +}; + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random(const Scalar& x, const Scalar& y) { + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(x, y); +} + +template +inline EIGEN_MATHFUNC_RETVAL(random, Scalar) random() { + return EIGEN_MATHFUNC_IMPL(random, Scalar)::run(); +} + +// TODO: replace or provide alternatives to this, e.g. std::random_device +struct eigen_random_device { + using ReturnType = int; + static constexpr int Entropy = meta_floor_log2<(unsigned int)(RAND_MAX) + 1>::value; + static constexpr ReturnType Highest = RAND_MAX; + static EIGEN_DEVICE_FUNC inline ReturnType run() { return std::rand(); } +}; + +// Fill a built-in unsigned integer with numRandomBits beginning with the least significant bit +template +struct random_bits_impl { + EIGEN_STATIC_ASSERT(std::is_unsigned::value, SCALAR MUST BE A BUILT - IN UNSIGNED INTEGER) + using RandomDevice = eigen_random_device; + using RandomReturnType = typename RandomDevice::ReturnType; + static constexpr int kEntropy = RandomDevice::Entropy; + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + // return a Scalar filled with numRandomBits beginning from the least significant bit + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert((numRandomBits >= 0) && (numRandomBits <= kTotalBits)); + const Scalar mask = Scalar(-1) >> ((kTotalBits - numRandomBits) & (kTotalBits - 1)); + Scalar randomBits = 0; + for (int shift = 0; shift < numRandomBits; shift += kEntropy) { + RandomReturnType r = RandomDevice::run(); + randomBits |= static_cast(r) << shift; + } + // clear the excess bits + randomBits &= mask; + return randomBits; + } +}; + +template +EIGEN_DEVICE_FUNC inline BitsType getRandomBits(int numRandomBits) { + return random_bits_impl::run(numRandomBits); +} + +// random implementation for a built-in floating point type +template ::value> +struct random_float_impl { + using BitsType = typename numext::get_integer_by_size::unsigned_type; + static constexpr EIGEN_DEVICE_FUNC inline int mantissaBits() { + const int digits = NumTraits::digits(); + return digits - 1; + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + BitsType randomBits = getRandomBits(numRandomBits); + // if fewer than MantissaBits is requested, shift them to the left + randomBits <<= (mantissaBits() - numRandomBits); + // randomBits is in the half-open interval [2,4) + randomBits |= numext::bit_cast(Scalar(2)); + // result is in the half-open interval [-1,1) + Scalar result = numext::bit_cast(randomBits) - Scalar(3); + return result; + } +}; +// random implementation for a custom floating point type +// uses double as the implementation with a mantissa with a size equal to either the target scalar's mantissa or that of +// double, whichever is smaller +template +struct random_float_impl { + static EIGEN_DEVICE_FUNC inline int mantissaBits() { + const int digits = NumTraits::digits(); + constexpr int kDoubleDigits = NumTraits::digits(); + return numext::mini(digits, kDoubleDigits) - 1; + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + Scalar result = static_cast(random_float_impl::run(numRandomBits)); + return result; + } +}; + +#if !EIGEN_COMP_NVCC +// random implementation for long double +// this specialization is not compatible with double-double scalars +template ::digits != (2 * std::numeric_limits::digits)))> +struct random_longdouble_impl { + static constexpr int Size = sizeof(long double); + static constexpr EIGEN_DEVICE_FUNC int mantissaBits() { return NumTraits::digits() - 1; } + static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) { + eigen_assert(numRandomBits >= 0 && numRandomBits <= mantissaBits()); + EIGEN_USING_STD(memcpy); + int numLowBits = numext::mini(numRandomBits, 64); + int numHighBits = numext::maxi(numRandomBits - 64, 0); + uint64_t randomBits[2]; + long double result = 2.0L; + memcpy(&randomBits, &result, Size); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + randomBits[0] |= getRandomBits(numLowBits); + randomBits[1] |= getRandomBits(numHighBits); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + randomBits[0] |= getRandomBits(numHighBits); + randomBits[1] |= getRandomBits(numLowBits); +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif + memcpy(&result, &randomBits, Size); + result -= 3.0L; + return result; + } +}; +template <> +struct random_longdouble_impl { + static constexpr EIGEN_DEVICE_FUNC int mantissaBits() { return NumTraits::digits() - 1; } + static EIGEN_DEVICE_FUNC inline long double run(int numRandomBits) { + return static_cast(random_float_impl::run(numRandomBits)); + } +}; +template <> +struct random_float_impl : random_longdouble_impl<> {}; +#endif + +template +struct random_default_impl { + using Impl = random_float_impl; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + Scalar half_x = Scalar(0.5) * x; + Scalar half_y = Scalar(0.5) * y; + Scalar result = (half_x + half_y) + (half_y - half_x) * run(numRandomBits); + // result is in the half-open interval [x, y) -- provided that x < y + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + return run(x, y, Impl::mantissaBits()); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { return Impl::run(numRandomBits); } + static EIGEN_DEVICE_FUNC inline Scalar run() { return run(Impl::mantissaBits()); } +}; + +template ::IsSigned, bool BuiltIn = std::is_integral::value> +struct random_int_impl; + +// random implementation for a built-in unsigned integer type +template +struct random_int_impl { + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; + Scalar range = y - x; + // handle edge case where [x,y] spans the entire range of Scalar + if (range == NumTraits::highest()) return run(); + Scalar count = range + 1; + // calculate the number of random bits needed to fill range + int numRandomBits = log2_ceil(count); + Scalar randomBits; + do { + randomBits = getRandomBits(numRandomBits); + // if the random draw is outside [0, range), try again (rejection sampling) + // in the worst-case scenario, the probability of rejection is: 1/2 - 1/2^numRandomBits < 50% + } while (randomBits >= count); + Scalar result = x + randomBits; + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return getRandomBits(kTotalBits); } +}; + +// random implementation for a built-in signed integer type +template +struct random_int_impl { + static constexpr int kTotalBits = sizeof(Scalar) * CHAR_BIT; + using BitsType = typename make_unsigned::type; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + if (y <= x) return x; + // Avoid overflow by representing `range` as an unsigned type + BitsType range = static_cast(y) - static_cast(x); + BitsType randomBits = random_int_impl::run(0, range); + // Avoid overflow in the case where `x` is negative and there is a large range so + // `randomBits` would also be negative if cast to `Scalar` first. + Scalar result = static_cast(static_cast(x) + randomBits); + return result; + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return static_cast(getRandomBits(kTotalBits)); } +}; + +// todo: custom integers +template +struct random_int_impl { + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar&, const Scalar&) { return run(); } + static EIGEN_DEVICE_FUNC inline Scalar run() { + eigen_assert(std::false_type::value && "RANDOM FOR CUSTOM INTEGERS NOT YET SUPPORTED"); + return Scalar(0); + } +}; + +template +struct random_default_impl : random_int_impl {}; + +template <> +struct random_impl { + static EIGEN_DEVICE_FUNC inline bool run(const bool& x, const bool& y) { + if (y <= x) return x; + return run(); + } + static EIGEN_DEVICE_FUNC inline bool run() { return getRandomBits(1) ? true : false; } +}; + +template +struct random_default_impl { + typedef typename NumTraits::Real RealScalar; + using Impl = random_impl; + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y, int numRandomBits) { + return Scalar(Impl::run(x.real(), y.real(), numRandomBits), Impl::run(x.imag(), y.imag(), numRandomBits)); + } + static EIGEN_DEVICE_FUNC inline Scalar run(const Scalar& x, const Scalar& y) { + return Scalar(Impl::run(x.real(), y.real()), Impl::run(x.imag(), y.imag())); + } + static EIGEN_DEVICE_FUNC inline Scalar run(int numRandomBits) { + return Scalar(Impl::run(numRandomBits), Impl::run(numRandomBits)); + } + static EIGEN_DEVICE_FUNC inline Scalar run() { return Scalar(Impl::run(), Impl::run()); } +}; + +} // namespace internal +} // namespace Eigen + +#endif // EIGEN_RANDOM_IMPL_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/RealView.h b/o-voxel/third_party/eigen/Eigen/src/Core/RealView.h new file mode 100644 index 0000000000000000000000000000000000000000..4c1e3f1a95566dc3692651c5027fd9175d0d73bd --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/RealView.h @@ -0,0 +1,292 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2025 Charlie Schlosser +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REALVIEW_H +#define EIGEN_REALVIEW_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// Write access and vectorization requires array-oriented access to the real and imaginary components. +// From https://en.cppreference.com/w/cpp/numeric/complex.html: +// For any pointer to an element of an array of std::complex named p and any valid array index i, +// reinterpret_cast(p)[2 * i] is the real part of the complex number p[i], and +// reinterpret_cast(p)[2 * i + 1] is the imaginary part of the complex number p[i]. + +template +struct complex_array_access : std::false_type {}; +template +struct complex_array_access> : std::true_type {}; + +template +struct traits> : public traits { + template + static constexpr int double_size(T size, bool times_two) { + int size_as_int = int(size); + if (size_as_int == Dynamic) return Dynamic; + return times_two ? (2 * size_as_int) : size_as_int; + } + + using Base = traits; + using ComplexScalar = typename Base::Scalar; + using Scalar = typename NumTraits::Real; + + static constexpr bool ArrayAccess = complex_array_access::value; + static constexpr int ActualDirectAccessBit = ArrayAccess ? DirectAccessBit : 0; + static constexpr int ActualLvaluebit = !std::is_const::value && ArrayAccess ? LvalueBit : 0; + static constexpr int ActualPacketAccessBit = packet_traits::Vectorizable ? PacketAccessBit : 0; + static constexpr int FlagMask = + ActualDirectAccessBit | ActualLvaluebit | ActualPacketAccessBit | HereditaryBits | LinearAccessBit; + static constexpr int BaseFlags = int(evaluator::Flags) | int(Base::Flags); + static constexpr int Flags = BaseFlags & FlagMask; + static constexpr bool IsRowMajor = Flags & RowMajorBit; + static constexpr int RowsAtCompileTime = double_size(Base::RowsAtCompileTime, !IsRowMajor); + static constexpr int ColsAtCompileTime = double_size(Base::ColsAtCompileTime, IsRowMajor); + static constexpr int SizeAtCompileTime = size_at_compile_time(RowsAtCompileTime, ColsAtCompileTime); + static constexpr int MaxRowsAtCompileTime = double_size(Base::MaxRowsAtCompileTime, !IsRowMajor); + static constexpr int MaxColsAtCompileTime = double_size(Base::MaxColsAtCompileTime, IsRowMajor); + static constexpr int MaxSizeAtCompileTime = size_at_compile_time(MaxRowsAtCompileTime, MaxColsAtCompileTime); + static constexpr int OuterStrideAtCompileTime = double_size(outer_stride_at_compile_time::ret, true); + static constexpr int InnerStrideAtCompileTime = inner_stride_at_compile_time::ret; +}; + +template +struct evaluator> : private evaluator { + using BaseEvaluator = evaluator; + using XprType = RealView; + using ExpressionTraits = traits; + using ComplexScalar = typename ExpressionTraits::ComplexScalar; + using Scalar = typename ExpressionTraits::Scalar; + + static constexpr int Flags = ExpressionTraits::Flags; + static constexpr int CoeffReadCost = BaseEvaluator::CoeffReadCost; + static constexpr int Alignment = BaseEvaluator::Alignment; + static constexpr bool IsRowMajor = ExpressionTraits::IsRowMajor; + static constexpr bool DirectAccess = Flags & DirectAccessBit; + + using ComplexCoeffReturnType = std::conditional_t; + using CoeffReturnType = std::conditional_t; + + EIGEN_DEVICE_FUNC explicit evaluator(XprType realView) : BaseEvaluator(realView.m_xpr) {} + + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index row, Index col) const { + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + bool p = (IsRowMajor ? col : row) & 1; + ComplexScalar ccoeff = BaseEvaluator::coeff(r, c); + return p ? numext::imag(ccoeff) : numext::real(ccoeff); + } + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index row, Index col) const { + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + Index p = (IsRowMajor ? col : row) & 1; + ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(r, c); + return reinterpret_cast(ccoeff)[p]; + } + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar coeff(Index index) const { + ComplexScalar ccoeff = BaseEvaluator::coeff(index / 2); + bool p = index & 1; + return p ? numext::imag(ccoeff) : numext::real(ccoeff); + } + template = true> + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeff(Index index) const { + ComplexCoeffReturnType ccoeff = BaseEvaluator::coeff(index / 2); + Index p = index & 1; + return reinterpret_cast(ccoeff)[p]; + } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index row, Index col) { + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + Index p = (IsRowMajor ? col : row) & 1; + ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(r, c); + return reinterpret_cast(ccoeffRef)[p]; + } + constexpr EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar& coeffRef(Index index) { + ComplexScalar& ccoeffRef = BaseEvaluator::coeffRef(index / 2); + Index p = index & 1; + return reinterpret_cast(ccoeffRef)[p]; + } + + // If the first index is odd (imaginary), discard the first scalar + // in 'result' and assign the missing scalar. + // This operation is safe as the real component of the first scalar must exist. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index row, Index col) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + bool p = (IsRowMajor ? col : row) & 1; + ComplexPacket cresult = BaseEvaluator::template packet(r, c); + PacketType result = preinterpret(cresult); + if (p) { + Scalar aux[RealPacketSize + 1]; + pstoreu(aux, result); + Index lastr = IsRowMajor ? row : row + RealPacketSize - 1; + Index lastc = IsRowMajor ? col + RealPacketSize - 1 : col; + aux[RealPacketSize] = coeff(lastr, lastc); + result = ploadu(aux + 1); + } + return result; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packet(Index index) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + ComplexPacket cresult = BaseEvaluator::template packet(index / 2); + PacketType result = preinterpret(cresult); + bool p = index & 1; + if (p) { + Scalar aux[RealPacketSize + 1]; + pstoreu(aux, result); + aux[RealPacketSize] = coeff(index + RealPacketSize - 1); + result = ploadu(aux + 1); + } + return result; + } + + // The requested real packet segment forms the half-open interval [begin, end), where 'end' = 'begin' + 'count'. + // In order to access the underlying complex array, even indices must be aligned with the real components + // of the complex scalars. 'begin' and 'count' must be modified as follows: + // a) 'begin' must be rounded down to the nearest even number; and + // b) 'end' must be rounded up to the nearest even number. + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index row, Index col, Index begin, Index count) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + Index actualBegin = numext::round_down(begin, 2); + Index actualEnd = numext::round_down(begin + count + 1, 2); + Index actualCount = actualEnd - actualBegin; + Index r = IsRowMajor ? row : row / 2; + Index c = IsRowMajor ? col / 2 : col; + ComplexPacket cresult = + BaseEvaluator::template packetSegment(r, c, actualBegin / 2, actualCount / 2); + PacketType result = preinterpret(cresult); + bool p = (IsRowMajor ? col : row) & 1; + if (p) { + Scalar aux[RealPacketSize + 1] = {}; + pstoreu(aux, result); + Index lastr = IsRowMajor ? row : row + actualEnd - 1; + Index lastc = IsRowMajor ? col + actualEnd - 1 : col; + aux[actualEnd] = coeff(lastr, lastc); + result = ploadu(aux + 1); + } + return result; + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegment(Index index, Index begin, Index count) const { + constexpr int RealPacketSize = unpacket_traits::size; + using ComplexPacket = typename find_packet_by_size::type; + EIGEN_STATIC_ASSERT((find_packet_by_size::value), + MISSING COMPATIBLE COMPLEX PACKET TYPE) + Index actualBegin = numext::round_down(begin, 2); + Index actualEnd = numext::round_down(begin + count + 1, 2); + Index actualCount = actualEnd - actualBegin; + ComplexPacket cresult = + BaseEvaluator::template packetSegment(index / 2, actualBegin / 2, actualCount / 2); + PacketType result = preinterpret(cresult); + bool p = index & 1; + if (p) { + Scalar aux[RealPacketSize + 1] = {}; + pstoreu(aux, result); + aux[actualEnd] = coeff(index + actualEnd - 1); + result = ploadu(aux + 1); + } + return result; + } +}; + +} // namespace internal + +template +class RealView : public internal::dense_xpr_base>::type { + using ExpressionTraits = internal::traits; + EIGEN_STATIC_ASSERT(NumTraits::IsComplex, SCALAR MUST BE COMPLEX) + public: + using Scalar = typename ExpressionTraits::Scalar; + using Nested = RealView; + + EIGEN_DEVICE_FUNC explicit RealView(Xpr& xpr) : m_xpr(xpr) {} + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return Xpr::IsRowMajor ? m_xpr.rows() : 2 * m_xpr.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return Xpr::IsRowMajor ? 2 * m_xpr.cols() : m_xpr.cols(); } + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return 2 * m_xpr.size(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_xpr.innerStride(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return 2 * m_xpr.outerStride(); } + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { + m_xpr.resize(Xpr::IsRowMajor ? rows : rows / 2, Xpr::IsRowMajor ? cols / 2 : cols); + } + EIGEN_DEVICE_FUNC void resize(Index size) { m_xpr.resize(size / 2); } + EIGEN_DEVICE_FUNC Scalar* data() { return reinterpret_cast(m_xpr.data()); } + EIGEN_DEVICE_FUNC const Scalar* data() const { return reinterpret_cast(m_xpr.data()); } + + EIGEN_DEVICE_FUNC RealView(const RealView&) = default; + + EIGEN_DEVICE_FUNC RealView& operator=(const RealView& other); + + template + EIGEN_DEVICE_FUNC RealView& operator=(const RealView& other); + + template + EIGEN_DEVICE_FUNC RealView& operator=(const DenseBase& other); + + protected: + friend struct internal::evaluator; + Xpr& m_xpr; +}; + +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const RealView& other) { + internal::call_assignment(*this, other); + return *this; +} + +template +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const RealView& other) { + internal::call_assignment(*this, other); + return *this; +} + +template +template +EIGEN_DEVICE_FUNC RealView& RealView::operator=(const DenseBase& other) { + internal::call_assignment(*this, other.derived()); + return *this; +} + +template +EIGEN_DEVICE_FUNC typename DenseBase::RealViewReturnType DenseBase::realView() { + return RealViewReturnType(derived()); +} + +template +EIGEN_DEVICE_FUNC typename DenseBase::ConstRealViewReturnType DenseBase::realView() const { + return ConstRealViewReturnType(derived()); +} + +} // namespace Eigen + +#endif // EIGEN_REALVIEW_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Redux.h b/o-voxel/third_party/eigen/Eigen/src/Core/Redux.h new file mode 100644 index 0000000000000000000000000000000000000000..4ad0fff66dce77f295cb2188ad2443de73bcaed0 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Redux.h @@ -0,0 +1,535 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REDUX_H +#define EIGEN_REDUX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// TODO +// * implement other kind of vectorization +// * factorize code + +/*************************************************************************** + * Part 1 : the logic deciding a strategy for vectorization and unrolling + ***************************************************************************/ + +template +struct redux_traits { + public: + typedef typename find_best_packet::type PacketType; + enum { + PacketSize = unpacket_traits::size, + InnerMaxSize = int(Evaluator::IsRowMajor) ? Evaluator::MaxColsAtCompileTime : Evaluator::MaxRowsAtCompileTime, + OuterMaxSize = int(Evaluator::IsRowMajor) ? Evaluator::MaxRowsAtCompileTime : Evaluator::MaxColsAtCompileTime, + SliceVectorizedWork = int(InnerMaxSize) == Dynamic ? Dynamic + : int(OuterMaxSize) == Dynamic ? (int(InnerMaxSize) >= int(PacketSize) ? Dynamic : 0) + : (int(InnerMaxSize) / int(PacketSize)) * int(OuterMaxSize) + }; + + enum { + MayLinearize = (int(Evaluator::Flags) & LinearAccessBit), + MightVectorize = (int(Evaluator::Flags) & ActualPacketAccessBit) && (functor_traits::PacketAccess), + MayLinearVectorize = bool(MightVectorize) && bool(MayLinearize), + MaySliceVectorize = bool(MightVectorize) && (int(SliceVectorizedWork) == Dynamic || int(SliceVectorizedWork) >= 3) + }; + + public: + enum { + Traversal = int(MayLinearVectorize) ? int(LinearVectorizedTraversal) + : int(MaySliceVectorize) ? int(SliceVectorizedTraversal) + : int(MayLinearize) ? int(LinearTraversal) + : int(DefaultTraversal) + }; + + public: + enum { + Cost = Evaluator::SizeAtCompileTime == Dynamic + ? HugeCost + : int(Evaluator::SizeAtCompileTime) * int(Evaluator::CoeffReadCost) + + (Evaluator::SizeAtCompileTime - 1) * functor_traits::Cost, + UnrollingLimit = EIGEN_UNROLLING_LIMIT * (int(Traversal) == int(DefaultTraversal) ? 1 : int(PacketSize)) + }; + + public: + enum { Unrolling = Cost <= UnrollingLimit ? CompleteUnrolling : NoUnrolling }; + +#ifdef EIGEN_DEBUG_ASSIGN + static void debug() { + std::cerr << "Xpr: " << typeid(typename Evaluator::XprType).name() << std::endl; + std::cerr.setf(std::ios::hex, std::ios::basefield); + EIGEN_DEBUG_VAR(Evaluator::Flags) + std::cerr.unsetf(std::ios::hex); + EIGEN_DEBUG_VAR(InnerMaxSize) + EIGEN_DEBUG_VAR(OuterMaxSize) + EIGEN_DEBUG_VAR(SliceVectorizedWork) + EIGEN_DEBUG_VAR(PacketSize) + EIGEN_DEBUG_VAR(MightVectorize) + EIGEN_DEBUG_VAR(MayLinearVectorize) + EIGEN_DEBUG_VAR(MaySliceVectorize) + std::cerr << "Traversal" + << " = " << Traversal << " (" << demangle_traversal(Traversal) << ")" << std::endl; + EIGEN_DEBUG_VAR(UnrollingLimit) + std::cerr << "Unrolling" + << " = " << Unrolling << " (" << demangle_unrolling(Unrolling) << ")" << std::endl; + std::cerr << std::endl; + } +#endif +}; + +/*************************************************************************** + * Part 2 : unrollers + ***************************************************************************/ + +/*** no vectorization ***/ + +template +struct redux_novec_unroller { + static constexpr Index HalfLength = Length / 2; + + typedef typename Evaluator::Scalar Scalar; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) { + return func(redux_novec_unroller::run(eval, func), + redux_novec_unroller::run(eval, func)); + } +}; + +template +struct redux_novec_unroller { + static constexpr Index outer = Start / Evaluator::InnerSizeAtCompileTime; + static constexpr Index inner = Start % Evaluator::InnerSizeAtCompileTime; + + typedef typename Evaluator::Scalar Scalar; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) { + return eval.coeffByOuterInner(outer, inner); + } +}; + +// This is actually dead code and will never be called. It is required +// to prevent false warnings regarding failed inlining though +// for 0 length run() will never be called at all. +template +struct redux_novec_unroller { + typedef typename Evaluator::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } +}; + +template +struct redux_novec_linear_unroller { + static constexpr Index HalfLength = Length / 2; + + typedef typename Evaluator::Scalar Scalar; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func) { + return func(redux_novec_linear_unroller::run(eval, func), + redux_novec_linear_unroller::run(eval, func)); + } +}; + +template +struct redux_novec_linear_unroller { + typedef typename Evaluator::Scalar Scalar; + + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func&) { + return eval.coeff(Start); + } +}; + +// This is actually dead code and will never be called. It is required +// to prevent false warnings regarding failed inlining though +// for 0 length run() will never be called at all. +template +struct redux_novec_linear_unroller { + typedef typename Evaluator::Scalar Scalar; + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator&, const Func&) { return Scalar(); } +}; + +/*** vectorization ***/ + +template +struct redux_vec_unroller { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func) { + constexpr Index HalfLength = Length / 2; + + return func.packetOp( + redux_vec_unroller::template run(eval, func), + redux_vec_unroller::template run(eval, + func)); + } +}; + +template +struct redux_vec_unroller { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func&) { + constexpr Index PacketSize = unpacket_traits::size; + constexpr Index index = Start * PacketSize; + constexpr Index outer = index / int(Evaluator::InnerSizeAtCompileTime); + constexpr Index inner = index % int(Evaluator::InnerSizeAtCompileTime); + constexpr int alignment = Evaluator::Alignment; + + return eval.template packetByOuterInner(outer, inner); + } +}; + +template +struct redux_vec_linear_unroller { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func& func) { + constexpr Index HalfLength = Length / 2; + + return func.packetOp( + redux_vec_linear_unroller::template run(eval, func), + redux_vec_linear_unroller::template run( + eval, func)); + } +}; + +template +struct redux_vec_linear_unroller { + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE PacketType run(const Evaluator& eval, const Func&) { + constexpr Index PacketSize = unpacket_traits::size; + constexpr Index index = (Start * PacketSize); + constexpr int alignment = Evaluator::Alignment; + return eval.template packet(index); + } +}; + +/*************************************************************************** + * Part 3 : implementation of all cases + ***************************************************************************/ + +template ::Traversal, + int Unrolling = redux_traits::Unrolling> +struct redux_impl; + +template +struct redux_impl { + typedef typename Evaluator::Scalar Scalar; + + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) { + eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix"); + Scalar res = eval.coeffByOuterInner(0, 0); + for (Index i = 1; i < xpr.innerSize(); ++i) res = func(res, eval.coeffByOuterInner(0, i)); + for (Index i = 1; i < xpr.outerSize(); ++i) + for (Index j = 0; j < xpr.innerSize(); ++j) res = func(res, eval.coeffByOuterInner(i, j)); + return res; + } +}; + +template +struct redux_impl { + typedef typename Evaluator::Scalar Scalar; + + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) { + eigen_assert(xpr.size() > 0 && "you are using an empty matrix"); + Scalar res = eval.coeff(0); + for (Index k = 1; k < xpr.size(); ++k) res = func(res, eval.coeff(k)); + return res; + } +}; + +template +struct redux_impl + : redux_novec_unroller { + typedef redux_novec_unroller Base; + typedef typename Evaluator::Scalar Scalar; + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, + const XprType& /*xpr*/) { + return Base::run(eval, func); + } +}; + +template +struct redux_impl + : redux_novec_linear_unroller { + typedef redux_novec_linear_unroller Base; + typedef typename Evaluator::Scalar Scalar; + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, + const XprType& /*xpr*/) { + return Base::run(eval, func); + } +}; + +template +struct redux_impl { + typedef typename Evaluator::Scalar Scalar; + typedef typename redux_traits::PacketType PacketScalar; + + template + static Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) { + const Index size = xpr.size(); + + constexpr Index packetSize = redux_traits::PacketSize; + constexpr int packetAlignment = unpacket_traits::alignment; + constexpr int alignment0 = + (bool(Evaluator::Flags & DirectAccessBit) && bool(packet_traits::AlignedOnScalar)) + ? int(packetAlignment) + : int(Unaligned); + constexpr int alignment = plain_enum_max(alignment0, Evaluator::Alignment); + const Index alignedStart = internal::first_default_aligned(xpr); + const Index alignedSize2 = ((size - alignedStart) / (2 * packetSize)) * (2 * packetSize); + const Index alignedSize = ((size - alignedStart) / (packetSize)) * (packetSize); + const Index alignedEnd2 = alignedStart + alignedSize2; + const Index alignedEnd = alignedStart + alignedSize; + Scalar res; + if (alignedSize) { + PacketScalar packet_res0 = eval.template packet(alignedStart); + if (alignedSize > packetSize) // we have at least two packets to partly unroll the loop + { + PacketScalar packet_res1 = eval.template packet(alignedStart + packetSize); + for (Index index = alignedStart + 2 * packetSize; index < alignedEnd2; index += 2 * packetSize) { + packet_res0 = func.packetOp(packet_res0, eval.template packet(index)); + packet_res1 = func.packetOp(packet_res1, eval.template packet(index + packetSize)); + } + + packet_res0 = func.packetOp(packet_res0, packet_res1); + if (alignedEnd > alignedEnd2) + packet_res0 = func.packetOp(packet_res0, eval.template packet(alignedEnd2)); + } + res = func.predux(packet_res0); + + for (Index index = 0; index < alignedStart; ++index) res = func(res, eval.coeff(index)); + + for (Index index = alignedEnd; index < size; ++index) res = func(res, eval.coeff(index)); + } else // too small to vectorize anything. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. + { + res = eval.coeff(0); + for (Index index = 1; index < size; ++index) res = func(res, eval.coeff(index)); + } + + return res; + } +}; + +// NOTE: for SliceVectorizedTraversal we simply bypass unrolling +template +struct redux_impl { + typedef typename Evaluator::Scalar Scalar; + typedef typename redux_traits::PacketType PacketType; + + template + EIGEN_DEVICE_FUNC static Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) { + eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix"); + constexpr Index packetSize = redux_traits::PacketSize; + const Index innerSize = xpr.innerSize(); + const Index outerSize = xpr.outerSize(); + const Index packetedInnerSize = ((innerSize) / packetSize) * packetSize; + Scalar res; + if (packetedInnerSize) { + PacketType packet_res = eval.template packet(0, 0); + for (Index j = 0; j < outerSize; ++j) + for (Index i = (j == 0 ? packetSize : 0); i < packetedInnerSize; i += Index(packetSize)) + packet_res = func.packetOp(packet_res, eval.template packetByOuterInner(j, i)); + + res = func.predux(packet_res); + for (Index j = 0; j < outerSize; ++j) + for (Index i = packetedInnerSize; i < innerSize; ++i) res = func(res, eval.coeffByOuterInner(j, i)); + } else // too small to vectorize anything. + // since this is dynamic-size hence inefficient anyway for such small sizes, don't try to optimize. + { + res = redux_impl::run(eval, func, xpr); + } + + return res; + } +}; + +template +struct redux_impl { + typedef typename Evaluator::Scalar Scalar; + + typedef typename redux_traits::PacketType PacketType; + static constexpr Index PacketSize = redux_traits::PacketSize; + static constexpr Index Size = Evaluator::SizeAtCompileTime; + static constexpr Index VectorizedSize = (int(Size) / int(PacketSize)) * int(PacketSize); + + template + EIGEN_DEVICE_FUNC static EIGEN_STRONG_INLINE Scalar run(const Evaluator& eval, const Func& func, const XprType& xpr) { + EIGEN_ONLY_USED_FOR_DEBUG(xpr) + eigen_assert(xpr.rows() > 0 && xpr.cols() > 0 && "you are using an empty matrix"); + if (VectorizedSize > 0) { + Scalar res = func.predux( + redux_vec_linear_unroller::template run(eval, func)); + if (VectorizedSize != Size) + res = func( + res, redux_novec_linear_unroller::run(eval, func)); + return res; + } else { + return redux_novec_linear_unroller::run(eval, func); + } + } +}; + +// evaluator adaptor +template +class redux_evaluator : public internal::evaluator { + typedef internal::evaluator Base; + + public: + typedef XprType_ XprType; + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE explicit redux_evaluator(const XprType& xpr) : Base(xpr) {} + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + typedef typename XprType::PacketScalar PacketScalar; + + enum { + MaxRowsAtCompileTime = XprType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = XprType::MaxColsAtCompileTime, + // TODO we should not remove DirectAccessBit and rather find an elegant way to query the alignment offset at runtime + // from the evaluator + Flags = Base::Flags & ~DirectAccessBit, + IsRowMajor = XprType::IsRowMajor, + SizeAtCompileTime = XprType::SizeAtCompileTime, + InnerSizeAtCompileTime = XprType::InnerSizeAtCompileTime + }; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE CoeffReturnType coeffByOuterInner(Index outer, Index inner) const { + return Base::coeff(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetByOuterInner(Index outer, Index inner) const { + return Base::template packet(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE PacketType packetSegmentByOuterInner(Index outer, Index inner, Index begin, + Index count) const { + return Base::template packetSegment(IsRowMajor ? outer : inner, IsRowMajor ? inner : outer, + begin, count); + } +}; + +} // end namespace internal + +/*************************************************************************** + * Part 4 : public API + ***************************************************************************/ + +/** \returns the result of a full redux operation on the whole matrix or vector using \a func + * + * The template parameter \a BinaryOp is the type of the functor \a func which must be + * an associative operator. Both current C++98 and C++11 functor styles are handled. + * + * \warning the matrix must be not empty, otherwise an assertion is triggered. + * + * \sa DenseBase::sum(), DenseBase::minCoeff(), DenseBase::maxCoeff(), MatrixBase::colwise(), MatrixBase::rowwise() + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::redux( + const Func& func) const { + eigen_assert(this->rows() > 0 && this->cols() > 0 && "you are using an empty matrix"); + + typedef typename internal::redux_evaluator ThisEvaluator; + ThisEvaluator thisEval(derived()); + + // The initial expression is passed to the reducer as an additional argument instead of + // passing it as a member of redux_evaluator to help + return internal::redux_impl::run(thisEval, func, derived()); +} + +/** \returns the minimum of all coefficients of \c *this. + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is minimum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::minCoeff() const { + return derived().redux(Eigen::internal::scalar_min_op()); +} + +/** \returns the maximum of all coefficients of \c *this. + * In case \c *this contains NaN, NaNPropagation determines the behavior: + * NaNPropagation == PropagateFast : undefined + * NaNPropagation == PropagateNaN : result is NaN + * NaNPropagation == PropagateNumbers : result is maximum of elements that are not NaN + * \warning the matrix must be not empty, otherwise an assertion is triggered. + */ +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::maxCoeff() const { + return derived().redux(Eigen::internal::scalar_max_op()); +} + +/** \returns the sum of all coefficients of \c *this + * + * If \c *this is empty, then the value 0 is returned. + * + * \sa trace(), prod(), mean() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::sum() const { + if (SizeAtCompileTime == 0 || (SizeAtCompileTime == Dynamic && size() == 0)) return Scalar(0); + return derived().redux(Eigen::internal::scalar_sum_op()); +} + +/** \returns the mean of all coefficients of *this + * + * \sa trace(), prod(), sum() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::mean() const { +#ifdef __INTEL_COMPILER +#pragma warning push +#pragma warning(disable : 2259) +#endif + return Scalar(derived().redux(Eigen::internal::scalar_sum_op())) / Scalar(this->size()); +#ifdef __INTEL_COMPILER +#pragma warning pop +#endif +} + +/** \returns the product of all coefficients of *this + * + * Example: \include MatrixBase_prod.cpp + * Output: \verbinclude MatrixBase_prod.out + * + * \sa sum(), mean(), trace() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar DenseBase::prod() const { + if (SizeAtCompileTime == 0 || (SizeAtCompileTime == Dynamic && size() == 0)) return Scalar(1); + return derived().redux(Eigen::internal::scalar_product_op()); +} + +/** \returns the trace of \c *this, i.e. the sum of the coefficients on the main diagonal. + * + * \c *this can be any matrix, not necessarily square. + * + * \sa diagonal(), sum() + */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename internal::traits::Scalar MatrixBase::trace() const { + return derived().diagonal().sum(); +} + +} // end namespace Eigen + +#endif // EIGEN_REDUX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Ref.h b/o-voxel/third_party/eigen/Eigen/src/Core/Ref.h new file mode 100644 index 0000000000000000000000000000000000000000..883a3cd3c2aae6960f9f270c65ac3fedf10d9a74 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Ref.h @@ -0,0 +1,383 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2012 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REF_H +#define EIGEN_REF_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct traits > + : public traits > { + typedef PlainObjectType_ PlainObjectType; + typedef StrideType_ StrideType; + enum { + Options = Options_, + Flags = traits >::Flags | NestByRefBit, + Alignment = traits >::Alignment, + InnerStrideAtCompileTime = traits >::InnerStrideAtCompileTime, + OuterStrideAtCompileTime = traits >::OuterStrideAtCompileTime + }; + + template + struct match { + enum { + IsVectorAtCompileTime = PlainObjectType::IsVectorAtCompileTime || Derived::IsVectorAtCompileTime, + HasDirectAccess = internal::has_direct_access::ret, + StorageOrderMatch = + IsVectorAtCompileTime || ((PlainObjectType::Flags & RowMajorBit) == (Derived::Flags & RowMajorBit)), + InnerStrideMatch = int(InnerStrideAtCompileTime) == int(Dynamic) || + int(InnerStrideAtCompileTime) == int(Derived::InnerStrideAtCompileTime) || + (int(InnerStrideAtCompileTime) == 0 && int(Derived::InnerStrideAtCompileTime) == 1), + OuterStrideMatch = IsVectorAtCompileTime || int(OuterStrideAtCompileTime) == int(Dynamic) || + int(OuterStrideAtCompileTime) == int(Derived::OuterStrideAtCompileTime), + // NOTE, this indirection of evaluator::Alignment is needed + // to workaround a very strange bug in MSVC related to the instantiation + // of has_*ary_operator in evaluator. + // This line is surprisingly very sensitive. For instance, simply adding parenthesis + // as "DerivedAlignment = (int(evaluator::Alignment))," will make MSVC fail... + DerivedAlignment = int(evaluator::Alignment), + AlignmentMatch = (int(traits::Alignment) == int(Unaligned)) || + (DerivedAlignment >= int(Alignment)), // FIXME the first condition is not very clear, it should + // be replaced by the required alignment + ScalarTypeMatch = internal::is_same::value, + MatchAtCompileTime = HasDirectAccess && StorageOrderMatch && InnerStrideMatch && OuterStrideMatch && + AlignmentMatch && ScalarTypeMatch + }; + typedef std::conditional_t type; + }; +}; + +template +struct traits > : public traits {}; + +} // namespace internal + +template +class RefBase : public MapBase { + typedef typename internal::traits::PlainObjectType PlainObjectType; + typedef typename internal::traits::StrideType StrideType; + + public: + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(RefBase) + + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { + return StrideType::InnerStrideAtCompileTime != 0 ? m_stride.inner() : 1; + } + + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { + return StrideType::OuterStrideAtCompileTime != 0 ? m_stride.outer() + : IsVectorAtCompileTime ? this->size() + : int(Flags) & RowMajorBit ? this->cols() + : this->rows(); + } + + EIGEN_DEVICE_FUNC RefBase() + : Base(0, RowsAtCompileTime == Dynamic ? 0 : RowsAtCompileTime, + ColsAtCompileTime == Dynamic ? 0 : ColsAtCompileTime), + // Stride<> does not allow default ctor for Dynamic strides, so let' initialize it with dummy values: + m_stride(StrideType::OuterStrideAtCompileTime == Dynamic ? 0 : StrideType::OuterStrideAtCompileTime, + StrideType::InnerStrideAtCompileTime == Dynamic ? 0 : StrideType::InnerStrideAtCompileTime) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(RefBase) + + protected: + typedef Stride StrideBase; + + // Resolves inner stride if default 0. + static EIGEN_DEVICE_FUNC constexpr Index resolveInnerStride(Index inner) { return inner == 0 ? 1 : inner; } + + // Resolves outer stride if default 0. + static EIGEN_DEVICE_FUNC constexpr Index resolveOuterStride(Index inner, Index outer, Index rows, Index cols, + bool isVectorAtCompileTime, bool isRowMajor) { + return outer == 0 ? isVectorAtCompileTime ? inner * rows * cols : isRowMajor ? inner * cols : inner * rows : outer; + } + + // Returns true if construction is valid, false if there is a stride mismatch, + // and fails if there is a size mismatch. + template + EIGEN_DEVICE_FUNC bool construct(Expression& expr) { + // Check matrix sizes. If this is a compile-time vector, we do allow + // implicitly transposing. + EIGEN_STATIC_ASSERT(EIGEN_PREDICATE_SAME_MATRIX_SIZE(PlainObjectType, Expression) + // If it is a vector, the transpose sizes might match. + || (PlainObjectType::IsVectorAtCompileTime && + ((int(PlainObjectType::RowsAtCompileTime) == Eigen::Dynamic || + int(Expression::ColsAtCompileTime) == Eigen::Dynamic || + int(PlainObjectType::RowsAtCompileTime) == int(Expression::ColsAtCompileTime)) && + (int(PlainObjectType::ColsAtCompileTime) == Eigen::Dynamic || + int(Expression::RowsAtCompileTime) == Eigen::Dynamic || + int(PlainObjectType::ColsAtCompileTime) == int(Expression::RowsAtCompileTime)))), + YOU_MIXED_MATRICES_OF_DIFFERENT_SIZES) + + // Determine runtime rows and columns. + Index rows = expr.rows(); + Index cols = expr.cols(); + if (PlainObjectType::RowsAtCompileTime == 1) { + eigen_assert(expr.rows() == 1 || expr.cols() == 1); + rows = 1; + cols = expr.size(); + } else if (PlainObjectType::ColsAtCompileTime == 1) { + eigen_assert(expr.rows() == 1 || expr.cols() == 1); + rows = expr.size(); + cols = 1; + } + // Verify that the sizes are valid. + eigen_assert((PlainObjectType::RowsAtCompileTime == Dynamic) || (PlainObjectType::RowsAtCompileTime == rows)); + eigen_assert((PlainObjectType::ColsAtCompileTime == Dynamic) || (PlainObjectType::ColsAtCompileTime == cols)); + + // If this is a vector, we might be transposing, which means that stride should swap. + const bool transpose = PlainObjectType::IsVectorAtCompileTime && (rows != expr.rows()); + // If the storage format differs, we also need to swap the stride. + const bool row_major = ((PlainObjectType::Flags)&RowMajorBit) != 0; + const bool expr_row_major = (Expression::Flags & RowMajorBit) != 0; + const bool storage_differs = (row_major != expr_row_major); + + const bool swap_stride = (transpose != storage_differs); + + // Determine expr's actual strides, resolving any defaults if zero. + const Index expr_inner_actual = resolveInnerStride(expr.innerStride()); + const Index expr_outer_actual = resolveOuterStride(expr_inner_actual, expr.outerStride(), expr.rows(), expr.cols(), + Expression::IsVectorAtCompileTime != 0, expr_row_major); + + // If this is a column-major row vector or row-major column vector, the inner-stride + // is arbitrary, so set it to either the compile-time inner stride or 1. + const bool row_vector = (rows == 1); + const bool col_vector = (cols == 1); + const Index inner_stride = + ((!row_major && row_vector) || (row_major && col_vector)) + ? (StrideType::InnerStrideAtCompileTime > 0 ? Index(StrideType::InnerStrideAtCompileTime) : 1) + : swap_stride ? expr_outer_actual + : expr_inner_actual; + + // If this is a column-major column vector or row-major row vector, the outer-stride + // is arbitrary, so set it to either the compile-time outer stride or vector size. + const Index outer_stride = + ((!row_major && col_vector) || (row_major && row_vector)) + ? (StrideType::OuterStrideAtCompileTime > 0 ? Index(StrideType::OuterStrideAtCompileTime) + : rows * cols * inner_stride) + : swap_stride ? expr_inner_actual + : expr_outer_actual; + + // Check if given inner/outer strides are compatible with compile-time strides. + const bool inner_valid = (StrideType::InnerStrideAtCompileTime == Dynamic) || + (resolveInnerStride(Index(StrideType::InnerStrideAtCompileTime)) == inner_stride); + if (!inner_valid) { + return false; + } + + const bool outer_valid = + (StrideType::OuterStrideAtCompileTime == Dynamic) || + (resolveOuterStride(inner_stride, Index(StrideType::OuterStrideAtCompileTime), rows, cols, + PlainObjectType::IsVectorAtCompileTime != 0, row_major) == outer_stride); + if (!outer_valid) { + return false; + } + + internal::construct_at(this, expr.data(), rows, cols); + internal::construct_at(&m_stride, (StrideType::OuterStrideAtCompileTime == 0) ? 0 : outer_stride, + (StrideType::InnerStrideAtCompileTime == 0) ? 0 : inner_stride); + return true; + } + + StrideBase m_stride; +}; + +/** \class Ref + * \ingroup Core_Module + * + * \brief A matrix or vector expression mapping an existing expression + * + * \tparam PlainObjectType the equivalent matrix type of the mapped data + * \tparam Options specifies the pointer alignment in bytes. It can be: \c #Aligned128, , \c #Aligned64, \c #Aligned32, + * \c #Aligned16, \c #Aligned8 or \c #Unaligned. The default is \c #Unaligned. \tparam StrideType optionally specifies + * strides. By default, Ref implies a contiguous storage along the inner dimension (inner stride==1), but accepts a + * variable outer stride (leading dimension). This can be overridden by specifying strides. The type passed here must be + * a specialization of the Stride template, see examples below. + * + * This class provides a way to write non-template functions taking Eigen objects as parameters while limiting the + * number of copies. A Ref<> object can represent either a const expression or a l-value: \code + * // in-out argument: + * void foo1(Ref x); + * + * // read-only const argument: + * void foo2(const Ref& x); + * \endcode + * + * In the in-out case, the input argument must satisfy the constraints of the actual Ref<> type, otherwise a compilation + * issue will be triggered. By default, a Ref can reference any dense vector expression of float having a + * contiguous memory layout. Likewise, a Ref can reference any column-major dense matrix expression of float + * whose column's elements are contiguously stored with the possibility to have a constant space in-between each column, + * i.e. the inner stride must be equal to 1, but the outer stride (or leading dimension) can be greater than the number + * of rows. + * + * In the const case, if the input expression does not match the above requirement, then it is evaluated into a + * temporary before being passed to the function. Here are some examples: \code MatrixXf A; VectorXf a; foo1(a.head()); + * // OK foo1(A.col()); // OK foo1(A.row()); // Compilation error because here innerstride!=1 + * foo2(A.row()); // Compilation error because A.row() is a 1xN object while foo2 is expecting a Nx1 object + * foo2(A.row().transpose()); // The row is copied into a contiguous temporary + * foo2(2*a); // The expression is evaluated into a temporary + * foo2(A.col().segment(2,4)); // No temporary + * \endcode + * + * The range of inputs that can be referenced without temporary can be enlarged using the last two template parameters. + * Here is an example accepting an innerstride!=1: + * \code + * // in-out argument: + * void foo3(Ref > x); + * foo3(A.row()); // OK + * \endcode + * The downside here is that the function foo3 might be significantly slower than foo1 because it won't be able to + * exploit vectorization, and will involve more expensive address computations even if the input is contiguously stored + * in memory. To overcome this issue, one might propose to overload internally calling a template function, e.g.: \code + * // in the .h: + * void foo(const Ref& A); + * void foo(const Ref >& A); + * + * // in the .cpp: + * template void foo_impl(const TypeOfA& A) { + * ... // crazy code goes here + * } + * void foo(const Ref& A) { foo_impl(A); } + * void foo(const Ref >& A) { foo_impl(A); } + * \endcode + * + * See also the following stackoverflow questions for further references: + * - Correct usage of the + * Eigen::Ref<> class + * + * \sa PlainObjectBase::Map(), \ref TopicStorageOrders + */ +template +class Ref : public RefBase > { + private: + typedef internal::traits Traits; + template + EIGEN_DEVICE_FUNC inline Ref( + const PlainObjectBase& expr, + std::enable_if_t::MatchAtCompileTime), Derived>* = 0); + + public: + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + +#ifndef EIGEN_PARSED_BY_DOXYGEN + template + EIGEN_DEVICE_FUNC inline Ref( + PlainObjectBase& expr, + std::enable_if_t::MatchAtCompileTime), Derived>* = 0) { + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + // Construction must pass since we will not create temporary storage in the non-const case. + const bool success = Base::construct(expr.derived()); + EIGEN_UNUSED_VARIABLE(success) + eigen_assert(success); + } + template + EIGEN_DEVICE_FUNC inline Ref( + const DenseBase& expr, + std::enable_if_t::MatchAtCompileTime), Derived>* = 0) +#else + /** Implicit constructor from any dense expression */ + template + inline Ref(DenseBase& expr) +#endif + { + EIGEN_STATIC_ASSERT(bool(internal::is_lvalue::value), THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + EIGEN_STATIC_ASSERT(bool(Traits::template match::MatchAtCompileTime), STORAGE_LAYOUT_DOES_NOT_MATCH); + EIGEN_STATIC_ASSERT(!Derived::IsPlainObjectBase, THIS_EXPRESSION_IS_NOT_A_LVALUE__IT_IS_READ_ONLY); + // Construction must pass since we will not create temporary storage in the non-const case. + const bool success = Base::construct(expr.const_cast_derived()); + EIGEN_UNUSED_VARIABLE(success) + eigen_assert(success); + } + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Ref) +}; + +// this is the const ref version +template +class Ref + : public RefBase > { + typedef internal::traits Traits; + + static constexpr bool may_map_m_object_successfully = + (static_cast(StrideType::InnerStrideAtCompileTime) == 0 || + static_cast(StrideType::InnerStrideAtCompileTime) == 1 || + static_cast(StrideType::InnerStrideAtCompileTime) == Dynamic) && + (TPlainObjectType::IsVectorAtCompileTime || static_cast(StrideType::OuterStrideAtCompileTime) == 0 || + static_cast(StrideType::OuterStrideAtCompileTime) == Dynamic || + static_cast(StrideType::OuterStrideAtCompileTime) == + static_cast(TPlainObjectType::InnerSizeAtCompileTime) || + static_cast(TPlainObjectType::InnerSizeAtCompileTime) == Dynamic); + + public: + typedef RefBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Ref) + + template + EIGEN_DEVICE_FUNC inline Ref(const DenseBase& expr, + std::enable_if_t::ScalarTypeMatch), Derived>* = 0) { + // std::cout << match_helper::HasDirectAccess << "," << match_helper::OuterStrideMatch << "," + // << match_helper::InnerStrideMatch << "\n"; std::cout << int(StrideType::OuterStrideAtCompileTime) + // << " - " << int(Derived::OuterStrideAtCompileTime) << "\n"; std::cout << + // int(StrideType::InnerStrideAtCompileTime) << " - " << int(Derived::InnerStrideAtCompileTime) << "\n"; + EIGEN_STATIC_ASSERT(Traits::template match::type::value || may_map_m_object_successfully, + STORAGE_LAYOUT_DOES_NOT_MATCH); + construct(expr.derived(), typename Traits::template match::type()); + } + + EIGEN_DEVICE_FUNC inline Ref(const Ref& other) : Base(other) { + // copy constructor shall not copy the m_object, to avoid unnecessary malloc and copy + } + + EIGEN_DEVICE_FUNC inline Ref(Ref&& other) { + if (other.data() == other.m_object.data()) { + m_object = std::move(other.m_object); + Base::construct(m_object); + } else + Base::construct(other); + } + + template + EIGEN_DEVICE_FUNC inline Ref(const RefBase& other) { + EIGEN_STATIC_ASSERT(Traits::template match::type::value || may_map_m_object_successfully, + STORAGE_LAYOUT_DOES_NOT_MATCH); + construct(other.derived(), typename Traits::template match::type()); + } + + protected: + template + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::true_type) { + // Check if we can use the underlying expr's storage directly, otherwise call the copy version. + if (!Base::construct(expr)) { + construct(expr, internal::false_type()); + } + } + + template + EIGEN_DEVICE_FUNC void construct(const Expression& expr, internal::false_type) { + internal::call_assignment_no_alias(m_object, expr, internal::assign_op()); + const bool success = Base::construct(m_object); + EIGEN_ONLY_USED_FOR_DEBUG(success) + eigen_assert(success); + } + + protected: + TPlainObjectType m_object; +}; + +} // end namespace Eigen + +#endif // EIGEN_REF_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Replicate.h b/o-voxel/third_party/eigen/Eigen/src/Core/Replicate.h new file mode 100644 index 0000000000000000000000000000000000000000..264b7c1ad344884bdc3731be1446c35168ec9a95 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Replicate.h @@ -0,0 +1,130 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REPLICATE_H +#define EIGEN_REPLICATE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : traits { + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type MatrixTypeNested; + typedef std::remove_reference_t MatrixTypeNested_; + enum { + RowsAtCompileTime = RowFactor == Dynamic || int(MatrixType::RowsAtCompileTime) == Dynamic + ? Dynamic + : RowFactor * MatrixType::RowsAtCompileTime, + ColsAtCompileTime = ColFactor == Dynamic || int(MatrixType::ColsAtCompileTime) == Dynamic + ? Dynamic + : ColFactor * MatrixType::ColsAtCompileTime, + // FIXME we don't propagate the max sizes !!! + MaxRowsAtCompileTime = RowsAtCompileTime, + MaxColsAtCompileTime = ColsAtCompileTime, + IsRowMajor = MaxRowsAtCompileTime == 1 && MaxColsAtCompileTime != 1 ? 1 + : MaxColsAtCompileTime == 1 && MaxRowsAtCompileTime != 1 ? 0 + : (MatrixType::Flags & RowMajorBit) ? 1 + : 0, + + // FIXME enable DirectAccess with negative strides? + Flags = IsRowMajor ? RowMajorBit : 0 + }; +}; +} // namespace internal + +/** + * \class Replicate + * \ingroup Core_Module + * + * \brief Expression of the multiple replication of a matrix or vector + * + * \tparam MatrixType the type of the object we are replicating + * \tparam RowFactor number of repetitions at compile time along the vertical direction, can be Dynamic. + * \tparam ColFactor number of repetitions at compile time along the horizontal direction, can be Dynamic. + * + * This class represents an expression of the multiple replication of a matrix or vector. + * It is the return type of DenseBase::replicate() and most of the time + * this is the only way it is used. + * + * \sa DenseBase::replicate() + */ +template +class Replicate : public internal::dense_xpr_base >::type { + typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; + typedef typename internal::traits::MatrixTypeNested_ MatrixTypeNested_; + + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Replicate) + typedef internal::remove_all_t NestedExpression; + + template + EIGEN_DEVICE_FUNC inline explicit Replicate(const OriginalMatrixType& matrix) + : m_matrix(matrix), m_rowFactor(RowFactor), m_colFactor(ColFactor) { + EIGEN_STATIC_ASSERT((internal::is_same, OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + eigen_assert(RowFactor != Dynamic && ColFactor != Dynamic); + } + + template + EIGEN_DEVICE_FUNC inline Replicate(const OriginalMatrixType& matrix, Index rowFactor, Index colFactor) + : m_matrix(matrix), m_rowFactor(rowFactor), m_colFactor(colFactor) { + EIGEN_STATIC_ASSERT((internal::is_same, OriginalMatrixType>::value), + THE_MATRIX_OR_EXPRESSION_THAT_YOU_PASSED_DOES_NOT_HAVE_THE_EXPECTED_TYPE) + } + + EIGEN_DEVICE_FUNC constexpr Index rows() const { return m_matrix.rows() * m_rowFactor.value(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const { return m_matrix.cols() * m_colFactor.value(); } + + EIGEN_DEVICE_FUNC const MatrixTypeNested_& nestedExpression() const { return m_matrix; } + + protected: + MatrixTypeNested m_matrix; + const internal::variable_if_dynamic m_rowFactor; + const internal::variable_if_dynamic m_colFactor; +}; + +/** + * \return an expression of the replication of \c *this + * + * Example: \include MatrixBase_replicate.cpp + * Output: \verbinclude MatrixBase_replicate.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(Index,Index), class Replicate + */ +template +template +EIGEN_DEVICE_FUNC const Replicate DenseBase::replicate() const { + return Replicate(derived()); +} + +/** + * \return an expression of the replication of each column (or row) of \c *this + * + * Example: \include DirectionWise_replicate_int.cpp + * Output: \verbinclude DirectionWise_replicate_int.out + * + * \sa VectorwiseOp::replicate(), DenseBase::replicate(), class Replicate + */ +template +EIGEN_DEVICE_FUNC const typename VectorwiseOp::ReplicateReturnType +VectorwiseOp::replicate(Index factor) const { + return typename VectorwiseOp::ReplicateReturnType( + _expression(), Direction == Vertical ? factor : 1, Direction == Horizontal ? factor : 1); +} + +} // end namespace Eigen + +#endif // EIGEN_REPLICATE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Reshaped.h b/o-voxel/third_party/eigen/Eigen/src/Core/Reshaped.h new file mode 100644 index 0000000000000000000000000000000000000000..6694e6af83d4cff9ff51d3faf7b90905393d1363 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Reshaped.h @@ -0,0 +1,398 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2017 Gael Guennebaud +// Copyright (C) 2014 yoco +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RESHAPED_H +#define EIGEN_RESHAPED_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class Reshaped + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size reshape + * + * \tparam XprType the type of the expression in which we are taking a reshape + * \tparam Rows the number of rows of the reshape we are taking at compile time (optional) + * \tparam Cols the number of columns of the reshape we are taking at compile time (optional) + * \tparam Order can be ColMajor or RowMajor, default is ColMajor. + * + * This class represents an expression of either a fixed-size or dynamic-size reshape. + * It is the return type of DenseBase::reshaped(NRowsType,NColsType) and + * most of the time this is the only way it is used. + * + * If you want to directly manipulate reshaped expressions, + * for instance if you want to write a function returning such an expression, + * it is advised to use the \em auto keyword for such use cases. + * + * Here is an example illustrating the dynamic case: + * \include class_Reshaped.cpp + * Output: \verbinclude class_Reshaped.out + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedReshaped.cpp + * Output: \verbinclude class_FixedReshaped.out + * + * \sa DenseBase::reshaped(NRowsType,NColsType) + */ + +namespace internal { + +template +struct traits > : traits { + typedef typename traits::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + enum { + MatrixRows = traits::RowsAtCompileTime, + MatrixCols = traits::ColsAtCompileTime, + RowsAtCompileTime = Rows, + ColsAtCompileTime = Cols, + MaxRowsAtCompileTime = Rows, + MaxColsAtCompileTime = Cols, + XpxStorageOrder = ((int(traits::Flags) & RowMajorBit) == RowMajorBit) ? RowMajor : ColMajor, + ReshapedStorageOrder = (RowsAtCompileTime == 1 && ColsAtCompileTime != 1) ? RowMajor + : (ColsAtCompileTime == 1 && RowsAtCompileTime != 1) ? ColMajor + : XpxStorageOrder, + HasSameStorageOrderAsXprType = (ReshapedStorageOrder == XpxStorageOrder), + InnerSize = (ReshapedStorageOrder == int(RowMajor)) ? int(ColsAtCompileTime) : int(RowsAtCompileTime), + InnerStrideAtCompileTime = HasSameStorageOrderAsXprType ? int(inner_stride_at_compile_time::ret) : Dynamic, + OuterStrideAtCompileTime = Dynamic, + + HasDirectAccess = internal::has_direct_access::ret && (Order == int(XpxStorageOrder)) && + ((evaluator::Flags & LinearAccessBit) == LinearAccessBit), + + MaskPacketAccessBit = + (InnerSize == Dynamic || (InnerSize % packet_traits::size) == 0) && (InnerStrideAtCompileTime == 1) + ? PacketAccessBit + : 0, + // MaskAlignedBit = ((OuterStrideAtCompileTime!=Dynamic) && (((OuterStrideAtCompileTime * int(sizeof(Scalar))) % 16) + // == 0)) ? AlignedBit : 0, + FlagsLinearAccessBit = (RowsAtCompileTime == 1 || ColsAtCompileTime == 1) ? LinearAccessBit : 0, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + FlagsRowMajorBit = (ReshapedStorageOrder == int(RowMajor)) ? RowMajorBit : 0, + FlagsDirectAccessBit = HasDirectAccess ? DirectAccessBit : 0, + Flags0 = traits::Flags & ((HereditaryBits & ~RowMajorBit) | MaskPacketAccessBit), + + Flags = (Flags0 | FlagsLinearAccessBit | FlagsLvalueBit | FlagsRowMajorBit | FlagsDirectAccessBit) + }; +}; + +template +class ReshapedImpl_dense; + +} // end namespace internal + +template +class ReshapedImpl; + +template +class Reshaped : public ReshapedImpl::StorageKind> { + typedef ReshapedImpl::StorageKind> Impl; + + public: + // typedef typename Impl::Base Base; + typedef Impl Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Reshaped) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reshaped) + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC inline Reshaped(XprType& xpr) : Impl(xpr) { + EIGEN_STATIC_ASSERT(RowsAtCompileTime != Dynamic && ColsAtCompileTime != Dynamic, + THIS_METHOD_IS_ONLY_FOR_FIXED_SIZE) + eigen_assert(Rows * Cols == xpr.rows() * xpr.cols()); + } + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC inline Reshaped(XprType& xpr, Index reshapeRows, Index reshapeCols) + : Impl(xpr, reshapeRows, reshapeCols) { + eigen_assert((RowsAtCompileTime == Dynamic || RowsAtCompileTime == reshapeRows) && + (ColsAtCompileTime == Dynamic || ColsAtCompileTime == reshapeCols)); + eigen_assert(reshapeRows * reshapeCols == xpr.rows() * xpr.cols()); + } +}; + +// The generic default implementation for dense reshape simply forward to the internal::ReshapedImpl_dense +// that must be specialized for direct and non-direct access... +template +class ReshapedImpl + : public internal::ReshapedImpl_dense >::HasDirectAccess> { + typedef internal::ReshapedImpl_dense >::HasDirectAccess> + Impl; + + public: + typedef Impl Base; + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl) + EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr) : Impl(xpr) {} + EIGEN_DEVICE_FUNC inline ReshapedImpl(XprType& xpr, Index reshapeRows, Index reshapeCols) + : Impl(xpr, reshapeRows, reshapeCols) {} +}; + +namespace internal { + +/** \internal Internal implementation of dense Reshaped in the general case. */ +template +class ReshapedImpl_dense + : public internal::dense_xpr_base >::type { + typedef Reshaped ReshapedType; + + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ReshapedType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl_dense) + + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + typedef internal::remove_all_t NestedExpression; + + class InnerIterator; + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr) : m_xpr(xpr), m_rows(Rows), m_cols(Cols) {} + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols) + : m_xpr(xpr), m_rows(nRows), m_cols(nCols) {} + + EIGEN_DEVICE_FUNC Index rows() const { return m_rows; } + EIGEN_DEVICE_FUNC Index cols() const { return m_cols; } + +#ifdef EIGEN_PARSED_BY_DOXYGEN + /** \sa MapBase::data() */ + EIGEN_DEVICE_FUNC constexpr const Scalar* data() const; + EIGEN_DEVICE_FUNC inline Index innerStride() const; + EIGEN_DEVICE_FUNC inline Index outerStride() const; +#endif + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { return m_xpr; } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC std::remove_reference_t& nestedExpression() { return m_xpr; } + + protected: + MatrixTypeNested m_xpr; + const internal::variable_if_dynamic m_rows; + const internal::variable_if_dynamic m_cols; +}; + +/** \internal Internal implementation of dense Reshaped in the direct access case. */ +template +class ReshapedImpl_dense : public MapBase > { + typedef Reshaped ReshapedType; + typedef typename internal::ref_selector::non_const_type XprTypeNested; + + public: + typedef MapBase Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ReshapedType) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(ReshapedImpl_dense) + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr) : Base(xpr.data()), m_xpr(xpr) {} + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC inline ReshapedImpl_dense(XprType& xpr, Index nRows, Index nCols) + : Base(xpr.data(), nRows, nCols), m_xpr(xpr) {} + + EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { return m_xpr; } + + EIGEN_DEVICE_FUNC XprType& nestedExpression() { return m_xpr; } + + /** \sa MapBase::innerStride() */ + EIGEN_DEVICE_FUNC constexpr Index innerStride() const { return m_xpr.innerStride(); } + + /** \sa MapBase::outerStride() */ + EIGEN_DEVICE_FUNC constexpr Index outerStride() const { + return (((Flags & RowMajorBit) == RowMajorBit) ? this->cols() : this->rows()) * m_xpr.innerStride(); + } + + protected: + XprTypeNested m_xpr; +}; + +// Evaluators +template +struct reshaped_evaluator; + +template +struct evaluator > + : reshaped_evaluator >::HasDirectAccess> { + typedef Reshaped XprType; + typedef typename XprType::Scalar Scalar; + // TODO: should check for smaller packet types + typedef typename packet_traits::type PacketScalar; + + enum { + CoeffReadCost = evaluator::CoeffReadCost, + HasDirectAccess = traits::HasDirectAccess, + + // RowsAtCompileTime = traits::RowsAtCompileTime, + // ColsAtCompileTime = traits::ColsAtCompileTime, + // MaxRowsAtCompileTime = traits::MaxRowsAtCompileTime, + // MaxColsAtCompileTime = traits::MaxColsAtCompileTime, + // + // InnerStrideAtCompileTime = traits::HasSameStorageOrderAsXprType + // ? int(inner_stride_at_compile_time::ret) + // : Dynamic, + // OuterStrideAtCompileTime = Dynamic, + + FlagsLinearAccessBit = + (traits::RowsAtCompileTime == 1 || traits::ColsAtCompileTime == 1 || HasDirectAccess) + ? LinearAccessBit + : 0, + FlagsRowMajorBit = (traits::ReshapedStorageOrder == int(RowMajor)) ? RowMajorBit : 0, + FlagsDirectAccessBit = HasDirectAccess ? DirectAccessBit : 0, + Flags0 = evaluator::Flags & (HereditaryBits & ~RowMajorBit), + Flags = Flags0 | FlagsLinearAccessBit | FlagsRowMajorBit | FlagsDirectAccessBit, + + PacketAlignment = unpacket_traits::alignment, + Alignment = evaluator::Alignment + }; + typedef reshaped_evaluator reshaped_evaluator_type; + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : reshaped_evaluator_type(xpr) { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } +}; + +template +struct reshaped_evaluator + : evaluator_base > { + typedef Reshaped XprType; + + enum { + CoeffReadCost = evaluator::CoeffReadCost /* TODO + cost of index computations */, + + Flags = (evaluator::Flags & (HereditaryBits /*| LinearAccessBit | DirectAccessBit*/)), + + Alignment = 0 + }; + + EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr) : m_argImpl(xpr.nestedExpression()), m_xpr(xpr) { + EIGEN_INTERNAL_CHECK_COST_VALUE(CoeffReadCost); + } + + typedef typename XprType::Scalar Scalar; + typedef typename XprType::CoeffReturnType CoeffReturnType; + + typedef std::pair RowCol; + + EIGEN_DEVICE_FUNC inline RowCol index_remap(Index rowId, Index colId) const { + if (Order == ColMajor) { + const Index nth_elem_idx = colId * m_xpr.rows() + rowId; + return RowCol(nth_elem_idx % m_xpr.nestedExpression().rows(), nth_elem_idx / m_xpr.nestedExpression().rows()); + } else { + const Index nth_elem_idx = colId + rowId * m_xpr.cols(); + return RowCol(nth_elem_idx / m_xpr.nestedExpression().cols(), nth_elem_idx % m_xpr.nestedExpression().cols()); + } + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index rowId, Index colId) { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + const RowCol row_col = index_remap(rowId, colId); + return m_argImpl.coeffRef(row_col.first, row_col.second); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index rowId, Index colId) const { + const RowCol row_col = index_remap(rowId, colId); + return m_argImpl.coeffRef(row_col.first, row_col.second); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const CoeffReturnType coeff(Index rowId, Index colId) const { + const RowCol row_col = index_remap(rowId, colId); + return m_argImpl.coeff(row_col.first, row_col.second); + } + + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index index) { + EIGEN_STATIC_ASSERT_LVALUE(XprType) + const RowCol row_col = index_remap(Rows == 1 ? 0 : index, Rows == 1 ? index : 0); + return m_argImpl.coeffRef(row_col.first, row_col.second); + } + + EIGEN_DEVICE_FUNC inline const Scalar& coeffRef(Index index) const { + const RowCol row_col = index_remap(Rows == 1 ? 0 : index, Rows == 1 ? index : 0); + return m_argImpl.coeffRef(row_col.first, row_col.second); + } + + EIGEN_DEVICE_FUNC inline const CoeffReturnType coeff(Index index) const { + const RowCol row_col = index_remap(Rows == 1 ? 0 : index, Rows == 1 ? index : 0); + return m_argImpl.coeff(row_col.first, row_col.second); + } +#if 0 + EIGEN_DEVICE_FUNC + template + inline PacketScalar packet(Index rowId, Index colId) const + { + const RowCol row_col = index_remap(rowId, colId); + return m_argImpl.template packet(row_col.first, row_col.second); + + } + + template + EIGEN_DEVICE_FUNC + inline void writePacket(Index rowId, Index colId, const PacketScalar& val) + { + const RowCol row_col = index_remap(rowId, colId); + m_argImpl.const_cast_derived().template writePacket + (row_col.first, row_col.second, val); + } + + template + EIGEN_DEVICE_FUNC + inline PacketScalar packet(Index index) const + { + const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + return m_argImpl.template packet(row_col.first, row_col.second); + } + + template + EIGEN_DEVICE_FUNC + inline void writePacket(Index index, const PacketScalar& val) + { + const RowCol row_col = index_remap(RowsAtCompileTime == 1 ? 0 : index, + RowsAtCompileTime == 1 ? index : 0); + return m_argImpl.template packet(row_col.first, row_col.second, val); + } +#endif + protected: + evaluator m_argImpl; + const XprType& m_xpr; +}; + +template +struct reshaped_evaluator + : mapbase_evaluator, + typename Reshaped::PlainObject> { + typedef Reshaped XprType; + typedef typename XprType::Scalar Scalar; + + EIGEN_DEVICE_FUNC explicit reshaped_evaluator(const XprType& xpr) + : mapbase_evaluator(xpr) { + // TODO: for the 3.4 release, this should be turned to an internal assertion, but let's keep it as is for the beta + // lifetime + eigen_assert(((std::uintptr_t(xpr.data()) % plain_enum_max(1, evaluator::Alignment)) == 0) && + "data is not aligned"); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_RESHAPED_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/ReturnByValue.h b/o-voxel/third_party/eigen/Eigen/src/Core/ReturnByValue.h new file mode 100644 index 0000000000000000000000000000000000000000..d751f20465746a686a7eed64c6715d9962aae97f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/ReturnByValue.h @@ -0,0 +1,111 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// Copyright (C) 2009-2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_RETURNBYVALUE_H +#define EIGEN_RETURNBYVALUE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct traits > : public traits::ReturnType> { + enum { + // We're disabling the DirectAccess because e.g. the constructor of + // the Block-with-DirectAccess expression requires to have a coeffRef method. + // Also, we don't want to have to implement the stride stuff. + Flags = (traits::ReturnType>::Flags | EvalBeforeNestingBit) & ~DirectAccessBit + }; +}; + +/* The ReturnByValue object doesn't even have a coeff() method. + * So the only way that nesting it in an expression can work, is by evaluating it into a plain matrix. + * So internal::nested always gives the plain return matrix type. + * + * FIXME: I don't understand why we need this specialization: isn't this taken care of by the EvalBeforeNestingBit ?? + * Answer: EvalBeforeNestingBit should be deprecated since we have the evaluators + */ +template +struct nested_eval, n, PlainObject> { + typedef typename traits::ReturnType type; +}; + +} // end namespace internal + +/** \class ReturnByValue + * \ingroup Core_Module + * + */ +template +class ReturnByValue : public internal::dense_xpr_base >::type, internal::no_assignment_operator { + public: + typedef typename internal::traits::ReturnType ReturnType; + + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(ReturnByValue) + + template + EIGEN_DEVICE_FUNC inline void evalTo(Dest& dst) const { + static_cast(this)->evalTo(dst); + } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return static_cast(this)->rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return static_cast(this)->cols(); } + +#ifndef EIGEN_PARSED_BY_DOXYGEN +#define Unusable \ + YOU_ARE_TRYING_TO_ACCESS_A_SINGLE_COEFFICIENT_IN_A_SPECIAL_EXPRESSION_WHERE_THAT_IS_NOT_ALLOWED_BECAUSE_THAT_WOULD_BE_INEFFICIENT + class Unusable { + Unusable(const Unusable&) {} + Unusable& operator=(const Unusable&) { return *this; } + }; + const Unusable& coeff(Index) const { return *reinterpret_cast(this); } + const Unusable& coeff(Index, Index) const { return *reinterpret_cast(this); } + Unusable& coeffRef(Index) { return *reinterpret_cast(this); } + Unusable& coeffRef(Index, Index) { return *reinterpret_cast(this); } +#undef Unusable +#endif +}; + +template +template +EIGEN_DEVICE_FUNC Derived& DenseBase::operator=(const ReturnByValue& other) { + other.evalTo(derived()); + return derived(); +} + +namespace internal { + +// Expression is evaluated in a temporary; default implementation of Assignment is bypassed so that +// when a ReturnByValue expression is assigned, the evaluator is not constructed. +// TODO: Finalize port to new regime; ReturnByValue should not exist in the expression world + +template +struct evaluator > : public evaluator::ReturnType> { + typedef ReturnByValue XprType; + typedef typename internal::traits::ReturnType PlainObject; + typedef evaluator Base; + + EIGEN_DEVICE_FUNC explicit evaluator(const XprType& xpr) : m_result(xpr.rows(), xpr.cols()) { + internal::construct_at(this, m_result); + xpr.evalTo(m_result); + } + + protected: + PlainObject m_result; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_RETURNBYVALUE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Reverse.h b/o-voxel/third_party/eigen/Eigen/src/Core/Reverse.h new file mode 100644 index 0000000000000000000000000000000000000000..64a436e2e3d4da6755e42b323b4cc07b990dc1b5 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Reverse.h @@ -0,0 +1,202 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009 Ricard Marxer +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_REVERSE_H +#define EIGEN_REVERSE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct traits > : traits { + typedef typename MatrixType::Scalar Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename ref_selector::type MatrixTypeNested; + typedef std::remove_reference_t MatrixTypeNested_; + enum { + RowsAtCompileTime = MatrixType::RowsAtCompileTime, + ColsAtCompileTime = MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxColsAtCompileTime, + Flags = MatrixTypeNested_::Flags & (RowMajorBit | LvalueBit) + }; +}; + +template +struct reverse_packet_cond { + static inline PacketType run(const PacketType& x) { return preverse(x); } +}; + +template +struct reverse_packet_cond { + static inline PacketType run(const PacketType& x) { return x; } +}; + +} // end namespace internal + +/** \class Reverse + * \ingroup Core_Module + * + * \brief Expression of the reverse of a vector or matrix + * + * \tparam MatrixType the type of the object of which we are taking the reverse + * \tparam Direction defines the direction of the reverse operation, can be Vertical, Horizontal, or BothDirections + * + * This class represents an expression of the reverse of a vector. + * It is the return type of MatrixBase::reverse() and VectorwiseOp::reverse() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::reverse(), VectorwiseOp::reverse() + */ +template +class Reverse : public internal::dense_xpr_base >::type { + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Reverse) + typedef internal::remove_all_t NestedExpression; + using Base::IsRowMajor; + + protected: + enum { + PacketSize = internal::packet_traits::size, + IsColMajor = !IsRowMajor, + ReverseRow = (Direction == Vertical) || (Direction == BothDirections), + ReverseCol = (Direction == Horizontal) || (Direction == BothDirections), + OffsetRow = ReverseRow && IsColMajor ? PacketSize : 1, + OffsetCol = ReverseCol && IsRowMajor ? PacketSize : 1, + ReversePacket = (Direction == BothDirections) || ((Direction == Vertical) && IsColMajor) || + ((Direction == Horizontal) && IsRowMajor) + }; + typedef internal::reverse_packet_cond reverse_packet; + + public: + EIGEN_DEVICE_FUNC explicit inline Reverse(const MatrixType& matrix) : m_matrix(matrix) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Reverse) + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } + + EIGEN_DEVICE_FUNC inline Index innerStride() const { return -m_matrix.innerStride(); } + + EIGEN_DEVICE_FUNC const internal::remove_all_t& nestedExpression() const { + return m_matrix; + } + + protected: + typename MatrixType::Nested m_matrix; +}; + +/** \returns an expression of the reverse of *this. + * + * Example: \include MatrixBase_reverse.cpp + * Output: \verbinclude MatrixBase_reverse.out + * + */ +template +EIGEN_DEVICE_FUNC inline typename DenseBase::ReverseReturnType DenseBase::reverse() { + return ReverseReturnType(derived()); +} + +// reverse const overload moved DenseBase.h due to a CUDA compiler bug + +/** This is the "in place" version of reverse: it reverses \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * - it allows future optimizations (cache friendliness, etc.) + * + * \sa VectorwiseOp::reverseInPlace(), reverse() */ +template +EIGEN_DEVICE_FUNC inline void DenseBase::reverseInPlace() { + constexpr int HalfRowsAtCompileTime = RowsAtCompileTime == Dynamic ? Dynamic : RowsAtCompileTime / 2; + constexpr int HalfColsAtCompileTime = ColsAtCompileTime == Dynamic ? Dynamic : ColsAtCompileTime / 2; + if (cols() > rows()) { + Index half = cols() / 2; + this->template leftCols(half).swap( + this->template rightCols(half).reverse()); + if ((cols() % 2) == 1) { + Index half2 = rows() / 2; + col(half).template head(half2).swap( + col(half).template tail(half2).reverse()); + } + } else { + Index half = rows() / 2; + this->template topRows(half).swap( + this->template bottomRows(half).reverse()); + if ((rows() % 2) == 1) { + Index half2 = cols() / 2; + row(half).template head(half2).swap( + row(half).template tail(half2).reverse()); + } + } +} + +namespace internal { + +template +struct vectorwise_reverse_inplace_impl; + +template <> +struct vectorwise_reverse_inplace_impl { + template + static void run(ExpressionType& xpr) { + constexpr Index HalfAtCompileTime = + ExpressionType::RowsAtCompileTime == Dynamic ? Dynamic : ExpressionType::RowsAtCompileTime / 2; + Index half = xpr.rows() / 2; + xpr.template topRows(half).swap( + xpr.template bottomRows(half).colwise().reverse()); + } +}; + +template <> +struct vectorwise_reverse_inplace_impl { + template + static void run(ExpressionType& xpr) { + constexpr Index HalfAtCompileTime = + ExpressionType::ColsAtCompileTime == Dynamic ? Dynamic : ExpressionType::ColsAtCompileTime / 2; + Index half = xpr.cols() / 2; + xpr.template leftCols(half).swap( + xpr.template rightCols(half).rowwise().reverse()); + } +}; + +} // end namespace internal + +/** This is the "in place" version of VectorwiseOp::reverse: it reverses each column or row of \c *this. + * + * In most cases it is probably better to simply use the reversed expression + * of a matrix. However, when reversing the matrix data itself is really needed, + * then this "in-place" version is probably the right choice because it provides + * the following additional benefits: + * - less error prone: doing the same operation with .reverse() requires special care: + * \code m = m.reverse().eval(); \endcode + * - this API enables reverse operations without the need for a temporary + * + * \sa DenseBase::reverseInPlace(), reverse() */ +template +EIGEN_DEVICE_FUNC void VectorwiseOp::reverseInPlace() { + internal::vectorwise_reverse_inplace_impl::run(m_matrix); +} + +} // end namespace Eigen + +#endif // EIGEN_REVERSE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Select.h b/o-voxel/third_party/eigen/Eigen/src/Core/Select.h new file mode 100644 index 0000000000000000000000000000000000000000..7584ad419503fe898842205d15ec4687a98d356f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Select.h @@ -0,0 +1,92 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELECT_H +#define EIGEN_SELECT_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \typedef Select + * \ingroup Core_Module + * + * \brief Expression of a coefficient wise version of the C++ ternary operator ?: + * + * \tparam ConditionMatrixType the type of the \em condition expression which must be a boolean matrix + * \tparam ThenMatrixType the type of the \em then expression + * \tparam ElseMatrixType the type of the \em else expression + * + * This type represents an expression of a coefficient wise version of the C++ ternary operator ?:. + * It is the return type of DenseBase::select() and most of the time this is the only way it is used. + * + * \sa DenseBase::select(const DenseBase&, const DenseBase&) const + */ +template +using Select = CwiseTernaryOp::Scalar, + typename DenseBase::Scalar, + typename DenseBase::Scalar>, + ThenMatrixType, ElseMatrixType, ConditionMatrixType>; + +/** \returns a matrix where each coefficient (i,j) is equal to \a thenMatrix(i,j) + * if \c *this(i,j) != Scalar(0), and \a elseMatrix(i,j) otherwise. + * + * Example: \include MatrixBase_select.cpp + * Output: \verbinclude MatrixBase_select.out + * + * \sa typedef Select + */ +template +template +inline EIGEN_DEVICE_FUNC CwiseTernaryOp< + internal::scalar_boolean_select_op::Scalar, typename DenseBase::Scalar, + typename DenseBase::Scalar>, + ThenDerived, ElseDerived, Derived> +DenseBase::select(const DenseBase& thenMatrix, const DenseBase& elseMatrix) const { + return Select(thenMatrix.derived(), elseMatrix.derived(), derived()); +} +/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with + * the \em else expression being a scalar value. + * + * \sa typedef Select + */ +template +template +inline EIGEN_DEVICE_FUNC CwiseTernaryOp< + internal::scalar_boolean_select_op::Scalar, typename DenseBase::Scalar, + typename DenseBase::Scalar>, + ThenDerived, typename DenseBase::ConstantReturnType, Derived> +DenseBase::select(const DenseBase& thenMatrix, + const typename DenseBase::Scalar& elseScalar) const { + using ElseConstantType = typename DenseBase::ConstantReturnType; + return Select(thenMatrix.derived(), + ElseConstantType(rows(), cols(), elseScalar), derived()); +} +/** Version of DenseBase::select(const DenseBase&, const DenseBase&) with + * the \em then expression being a scalar value. + * + * \sa typedef Select + */ +template +template +inline EIGEN_DEVICE_FUNC CwiseTernaryOp< + internal::scalar_boolean_select_op::Scalar, typename DenseBase::Scalar, + typename DenseBase::Scalar>, + typename DenseBase::ConstantReturnType, ElseDerived, Derived> +DenseBase::select(const typename DenseBase::Scalar& thenScalar, + const DenseBase& elseMatrix) const { + using ThenConstantType = typename DenseBase::ConstantReturnType; + return Select(ThenConstantType(rows(), cols(), thenScalar), + elseMatrix.derived(), derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_SELECT_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/SelfAdjointView.h b/o-voxel/third_party/eigen/Eigen/src/Core/SelfAdjointView.h new file mode 100644 index 0000000000000000000000000000000000000000..e88708d547e95ec278134717a860a4819cb3cd5e --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/SelfAdjointView.h @@ -0,0 +1,329 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFADJOINTMATRIX_H +#define EIGEN_SELFADJOINTMATRIX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class SelfAdjointView + * \ingroup Core_Module + * + * + * \brief Expression of a selfadjoint matrix from a triangular part of a dense matrix + * + * \tparam MatrixType the type of the dense matrix storing the coefficients + * \tparam TriangularPart can be either \c #Lower or \c #Upper + * + * This class is an expression of a sefladjoint matrix from a triangular part of a matrix + * with given dense storage of the coefficients. It is the return type of MatrixBase::selfadjointView() + * and most of the time this is the only way that it is used. + * + * \sa class TriangularBase, MatrixBase::selfadjointView() + */ + +namespace internal { +template +struct traits > : traits { + typedef typename ref_selector::non_const_type MatrixTypeNested; + typedef remove_all_t MatrixTypeNestedCleaned; + typedef MatrixType ExpressionType; + typedef typename MatrixType::PlainObject FullMatrixType; + enum { + Mode = UpLo | SelfAdjoint, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & + (~(PacketAccessBit | DirectAccessBit | LinearAccessBit)) // FIXME these flags should be preserved + }; +}; +} // namespace internal + +template +class SelfAdjointView : public TriangularBase > { + public: + EIGEN_STATIC_ASSERT(UpLo == Lower || UpLo == Upper, SELFADJOINTVIEW_ACCEPTS_UPPER_AND_LOWER_MODE_ONLY) + + typedef MatrixType_ MatrixType; + typedef TriangularBase Base; + typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; + typedef typename internal::traits::MatrixTypeNestedCleaned MatrixTypeNestedCleaned; + typedef MatrixTypeNestedCleaned NestedExpression; + + /** \brief The type of coefficients in this matrix */ + typedef typename internal::traits::Scalar Scalar; + typedef typename MatrixType::StorageIndex StorageIndex; + typedef internal::remove_all_t MatrixConjugateReturnType; + typedef SelfAdjointView, UpLo> ConstSelfAdjointView; + + enum { + Mode = internal::traits::Mode, + Flags = internal::traits::Flags, + TransposeMode = ((int(Mode) & int(Upper)) ? Lower : 0) | ((int(Mode) & int(Lower)) ? Upper : 0) + }; + typedef typename MatrixType::PlainObject PlainObject; + + EIGEN_DEVICE_FUNC explicit inline SelfAdjointView(MatrixType& matrix) : m_matrix(matrix) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return m_matrix.outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return m_matrix.innerStride(); } + + /** \sa MatrixBase::coeff() + * \warning the coordinates must fit into the referenced triangular part + */ + EIGEN_DEVICE_FUNC inline Scalar coeff(Index row, Index col) const { + Base::check_coordinates_internal(row, col); + return m_matrix.coeff(row, col); + } + + /** \sa MatrixBase::coeffRef() + * \warning the coordinates must fit into the referenced triangular part + */ + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { + EIGEN_STATIC_ASSERT_LVALUE(SelfAdjointView); + Base::check_coordinates_internal(row, col); + return m_matrix.coeffRef(row, col); + } + + /** \internal */ + EIGEN_DEVICE_FUNC const MatrixTypeNestedCleaned& _expression() const { return m_matrix; } + + EIGEN_DEVICE_FUNC const MatrixTypeNestedCleaned& nestedExpression() const { return m_matrix; } + EIGEN_DEVICE_FUNC MatrixTypeNestedCleaned& nestedExpression() { return m_matrix; } + + /** Efficient triangular matrix times vector/matrix product */ + template + EIGEN_DEVICE_FUNC const Product operator*(const MatrixBase& rhs) const { + return Product(*this, rhs.derived()); + } + + /** Efficient vector/matrix times triangular matrix product */ + template + friend EIGEN_DEVICE_FUNC const Product operator*(const MatrixBase& lhs, + const SelfAdjointView& rhs) { + return Product(lhs.derived(), rhs); + } + + friend EIGEN_DEVICE_FUNC const + SelfAdjointView + operator*(const Scalar& s, const SelfAdjointView& mat) { + return (s * mat.nestedExpression()).template selfadjointView(); + } + + /** Perform a symmetric rank 2 update of the selfadjoint matrix \c *this: + * \f$ this = this + \alpha u v^* + conj(\alpha) v u^* \f$ + * \returns a reference to \c *this + * + * The vectors \a u and \c v \b must be column vectors, however they can be + * a adjoint expression without any overhead. Only the meaningful triangular + * part of the matrix is updated, the rest is left unchanged. + * + * \sa rankUpdate(const MatrixBase&, Scalar) + */ + template + EIGEN_DEVICE_FUNC SelfAdjointView& rankUpdate(const MatrixBase& u, const MatrixBase& v, + const Scalar& alpha = Scalar(1)); + + /** Perform a symmetric rank K update of the selfadjoint matrix \c *this: + * \f$ this = this + \alpha ( u u^* ) \f$ where \a u is a vector or matrix. + * + * \returns a reference to \c *this + * + * Note that to perform \f$ this = this + \alpha ( u^* u ) \f$ you can simply + * call this function with u.adjoint(). + * + * \sa rankUpdate(const MatrixBase&, const MatrixBase&, Scalar) + */ + template + EIGEN_DEVICE_FUNC SelfAdjointView& rankUpdate(const MatrixBase& u, const Scalar& alpha = Scalar(1)); + + /** \returns an expression of a triangular view extracted from the current selfadjoint view of a given triangular part + * + * The parameter \a TriMode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper, + * \c #Lower, \c #StrictlyLower, \c #UnitLower. + * + * If \c TriMode references the same triangular part than \c *this, then this method simply return a \c TriangularView + * of the nested expression, otherwise, the nested expression is first transposed, thus returning a \c + * TriangularView> object. + * + * \sa MatrixBase::triangularView(), class TriangularView + */ + template + EIGEN_DEVICE_FUNC + std::conditional_t<(TriMode & (Upper | Lower)) == (UpLo & (Upper | Lower)), TriangularView, + TriangularView > + triangularView() const { + std::conditional_t<(TriMode & (Upper | Lower)) == (UpLo & (Upper | Lower)), MatrixType&, + typename MatrixType::ConstTransposeReturnType> + tmp1(m_matrix); + std::conditional_t<(TriMode & (Upper | Lower)) == (UpLo & (Upper | Lower)), MatrixType&, + typename MatrixType::AdjointReturnType> + tmp2(tmp1); + return std::conditional_t<(TriMode & (Upper | Lower)) == (UpLo & (Upper | Lower)), + TriangularView, + TriangularView >(tmp2); + } + + typedef SelfAdjointView ConjugateReturnType; + /** \sa MatrixBase::conjugate() const */ + EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const { + return ConjugateReturnType(m_matrix.conjugate()); + } + + /** \returns an expression of the complex conjugate of \c *this if Cond==true, + * returns \c *this otherwise. + */ + template + EIGEN_DEVICE_FUNC inline std::conditional_t conjugateIf() const { + typedef std::conditional_t ReturnType; + return ReturnType(m_matrix.template conjugateIf()); + } + + typedef SelfAdjointView AdjointReturnType; + /** \sa MatrixBase::adjoint() const */ + EIGEN_DEVICE_FUNC inline const AdjointReturnType adjoint() const { return AdjointReturnType(m_matrix.adjoint()); } + + typedef SelfAdjointView TransposeReturnType; + /** \sa MatrixBase::transpose() */ + template + EIGEN_DEVICE_FUNC inline TransposeReturnType transpose( + std::enable_if_t::value, Dummy*> = nullptr) { + typename MatrixType::TransposeReturnType tmp(m_matrix); + return TransposeReturnType(tmp); + } + + typedef SelfAdjointView ConstTransposeReturnType; + /** \sa MatrixBase::transpose() const */ + EIGEN_DEVICE_FUNC inline const ConstTransposeReturnType transpose() const { + return ConstTransposeReturnType(m_matrix.transpose()); + } + + /** \returns a const expression of the main diagonal of the matrix \c *this + * + * This method simply returns the diagonal of the nested expression, thus by-passing the SelfAdjointView decorator. + * + * \sa MatrixBase::diagonal(), class Diagonal */ + EIGEN_DEVICE_FUNC typename MatrixType::ConstDiagonalReturnType diagonal() const { + return typename MatrixType::ConstDiagonalReturnType(m_matrix); + } + + /////////// Cholesky module /////////// + + const LLT llt() const; + const LDLT ldlt() const; + + /////////// Eigenvalue module /////////// + + /** Real part of #Scalar */ + typedef typename NumTraits::Real RealScalar; + /** Return type of eigenvalues() */ + typedef Matrix::ColsAtCompileTime, 1> EigenvaluesReturnType; + + EIGEN_DEVICE_FUNC EigenvaluesReturnType eigenvalues() const; + EIGEN_DEVICE_FUNC RealScalar operatorNorm() const; + + protected: + MatrixTypeNested m_matrix; +}; + +// template +// internal::selfadjoint_matrix_product_returntype > +// operator*(const MatrixBase& lhs, const SelfAdjointView& rhs) +// { +// return internal::matrix_selfadjoint_product_returntype +// >(lhs.derived(),rhs); +// } + +// selfadjoint to dense matrix + +namespace internal { + +// TODO currently a selfadjoint expression has the form SelfAdjointView<.,.> +// in the future selfadjoint-ness should be defined by the expression traits +// such that Transpose > is valid. (currently TriangularBase::transpose() is overloaded to +// make it work) +template +struct evaluator_traits > { + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef SelfAdjointShape Shape; +}; + +template +class triangular_dense_assignment_kernel + : public generic_dense_assignment_kernel { + protected: + typedef generic_dense_assignment_kernel Base; + typedef typename Base::DstXprType DstXprType; + typedef typename Base::SrcXprType SrcXprType; + using Base::m_dst; + using Base::m_functor; + using Base::m_src; + + public: + typedef typename Base::DstEvaluatorType DstEvaluatorType; + typedef typename Base::SrcEvaluatorType SrcEvaluatorType; + typedef typename Base::Scalar Scalar; + typedef typename Base::AssignmentTraits AssignmentTraits; + + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType& dst, const SrcEvaluatorType& src, + const Functor& func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) {} + + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { + eigen_internal_assert(row != col); + Scalar tmp = m_src.coeff(row, col); + m_functor.assignCoeff(m_dst.coeffRef(row, col), tmp); + m_functor.assignCoeff(m_dst.coeffRef(col, row), numext::conj(tmp)); + } + + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { Base::assignCoeff(id, id); } + + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index, Index) { eigen_internal_assert(false && "should never be called"); } +}; + +} // end namespace internal + +/*************************************************************************** + * Implementation of MatrixBase methods + ***************************************************************************/ + +/** This is the const version of MatrixBase::selfadjointView() */ +template +template +EIGEN_DEVICE_FUNC typename MatrixBase::template ConstSelfAdjointViewReturnType::Type +MatrixBase::selfadjointView() const { + return typename ConstSelfAdjointViewReturnType::Type(derived()); +} + +/** \returns an expression of a symmetric/self-adjoint view extracted from the upper or lower triangular part of the + * current matrix + * + * The parameter \a UpLo can be either \c #Upper or \c #Lower + * + * Example: \include MatrixBase_selfadjointView.cpp + * Output: \verbinclude MatrixBase_selfadjointView.out + * + * \sa class SelfAdjointView + */ +template +template +EIGEN_DEVICE_FUNC typename MatrixBase::template SelfAdjointViewReturnType::Type +MatrixBase::selfadjointView() { + return typename SelfAdjointViewReturnType::Type(derived()); +} + +} // end namespace Eigen + +#endif // EIGEN_SELFADJOINTMATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h b/o-voxel/third_party/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h new file mode 100644 index 0000000000000000000000000000000000000000..5d504936d4420676e46b9d04e07d2bb30d4707d1 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/SelfCwiseBinaryOp.h @@ -0,0 +1,50 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009-2010 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SELFCWISEBINARYOP_H +#define EIGEN_SELFCWISEBINARYOP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const Scalar& other) { + using ConstantExpr = typename internal::plain_constant_type::type; + using Op = internal::mul_assign_op; + internal::call_assignment(derived(), ConstantExpr(rows(), cols(), other), Op()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator*=(const RealScalar& other) { + realView() *= other; + return derived(); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const Scalar& other) { + using ConstantExpr = typename internal::plain_constant_type::type; + using Op = internal::div_assign_op; + internal::call_assignment(derived(), ConstantExpr(rows(), cols(), other), Op()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Derived& DenseBase::operator/=(const RealScalar& other) { + realView() /= other; + return derived(); +} + +} // end namespace Eigen + +#endif // EIGEN_SELFCWISEBINARYOP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h b/o-voxel/third_party/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h new file mode 100644 index 0000000000000000000000000000000000000000..157ef70bc013b05bc70db54e5125edcc846c414a --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/SkewSymmetricMatrix3.h @@ -0,0 +1,382 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// Copyright (C) 2007-2009 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SKEWSYMMETRICMATRIX3_H +#define EIGEN_SKEWSYMMETRICMATRIX3_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class SkewSymmetricBase + * \ingroup Core_Module + * + * \brief Base class for skew symmetric matrices and expressions + * + * This is the base class that is inherited by SkewSymmetricMatrix3 and related expression + * types, which internally use a three vector for storing the entries. SkewSymmetric + * types always represent square three times three matrices. + * + * This implementations follows class DiagonalMatrix + * + * \tparam Derived is the derived type, a SkewSymmetricMatrix3 or SkewSymmetricWrapper. + * + * \sa class SkewSymmetricMatrix3, class SkewSymmetricWrapper + */ +template +class SkewSymmetricBase : public EigenBase { + public: + typedef typename internal::traits::SkewSymmetricVectorType SkewSymmetricVectorType; + typedef typename SkewSymmetricVectorType::Scalar Scalar; + typedef typename SkewSymmetricVectorType::RealScalar RealScalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + + enum { + RowsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime, + ColsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime, + IsVectorAtCompileTime = 0, + Flags = NoPreferredStorageOrderBit + }; + + typedef Matrix + DenseMatrixType; + typedef DenseMatrixType DenseType; + typedef SkewSymmetricMatrix3 PlainObject; + + /** \returns a reference to the derived object. */ + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + /** \returns a const reference to the derived object. */ + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } + + /** + * Constructs a dense matrix from \c *this. Note, this directly returns a dense matrix type, + * not an expression. + * \returns A dense matrix, with its entries set from the the derived object. */ + EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { return derived(); } + + /** Determinant vanishes */ + EIGEN_DEVICE_FUNC constexpr Scalar determinant() const { return 0; } + + /** A.transpose() = -A */ + EIGEN_DEVICE_FUNC PlainObject transpose() const { return (-vector()).asSkewSymmetric(); } + + /** \returns the exponential of this matrix using Rodrigues’ formula */ + EIGEN_DEVICE_FUNC DenseMatrixType exponential() const { + DenseMatrixType retVal = DenseMatrixType::Identity(); + const SkewSymmetricVectorType& v = vector(); + if (v.isZero()) { + return retVal; + } + const Scalar norm2 = v.squaredNorm(); + const Scalar norm = numext::sqrt(norm2); + retVal += ((((1 - numext::cos(norm)) / norm2) * derived()) * derived()) + + (numext::sin(norm) / norm) * derived().toDenseMatrix(); + return retVal; + } + + /** \returns a reference to the derived object's vector of coefficients. */ + EIGEN_DEVICE_FUNC inline const SkewSymmetricVectorType& vector() const { return derived().vector(); } + /** \returns a const reference to the derived object's vector of coefficients. */ + EIGEN_DEVICE_FUNC inline SkewSymmetricVectorType& vector() { return derived().vector(); } + + /** \returns the number of rows. */ + EIGEN_DEVICE_FUNC constexpr Index rows() const { return 3; } + /** \returns the number of columns. */ + EIGEN_DEVICE_FUNC constexpr Index cols() const { return 3; } + + /** \returns the matrix product of \c *this by the dense matrix, \a matrix */ + template + EIGEN_DEVICE_FUNC Product operator*( + const MatrixBase& matrix) const { + return Product(derived(), matrix.derived()); + } + + /** \returns the matrix product of \c *this by the skew symmetric matrix, \a matrix */ + template + EIGEN_DEVICE_FUNC Product operator*( + const SkewSymmetricBase& matrix) const { + return Product(derived(), matrix.derived()); + } + + template + using SkewSymmetricProductReturnType = SkewSymmetricWrapper; + + /** \returns the wedge product of \c *this by the skew symmetric matrix \a other + * A wedge B = AB - BA */ + template + EIGEN_DEVICE_FUNC SkewSymmetricProductReturnType wedge( + const SkewSymmetricBase& other) const { + return vector().cross(other.vector()).asSkewSymmetric(); + } + + using SkewSymmetricScaleReturnType = + SkewSymmetricWrapper; + + /** \returns the product of \c *this by the scalar \a scalar */ + EIGEN_DEVICE_FUNC inline SkewSymmetricScaleReturnType operator*(const Scalar& scalar) const { + return (vector() * scalar).asSkewSymmetric(); + } + + using ScaleSkewSymmetricReturnType = + SkewSymmetricWrapper; + + /** \returns the product of a scalar and the skew symmetric matrix \a other */ + EIGEN_DEVICE_FUNC friend inline ScaleSkewSymmetricReturnType operator*(const Scalar& scalar, + const SkewSymmetricBase& other) { + return (scalar * other.vector()).asSkewSymmetric(); + } + + template + using SkewSymmetricSumReturnType = SkewSymmetricWrapper; + + /** \returns the sum of \c *this and the skew symmetric matrix \a other */ + template + EIGEN_DEVICE_FUNC inline SkewSymmetricSumReturnType operator+( + const SkewSymmetricBase& other) const { + return (vector() + other.vector()).asSkewSymmetric(); + } + + template + using SkewSymmetricDifferenceReturnType = SkewSymmetricWrapper; + + /** \returns the difference of \c *this and the skew symmetric matrix \a other */ + template + EIGEN_DEVICE_FUNC inline SkewSymmetricDifferenceReturnType operator-( + const SkewSymmetricBase& other) const { + return (vector() - other.vector()).asSkewSymmetric(); + } +}; + +/** \class SkewSymmetricMatrix3 + * \ingroup Core_Module + * + * \brief Represents a 3x3 skew symmetric matrix with its storage + * + * \tparam Scalar_ the type of coefficients + * + * \sa class SkewSymmetricBase, class SkewSymmetricWrapper + */ + +namespace internal { +template +struct traits> : traits> { + typedef Matrix SkewSymmetricVectorType; + typedef SkewSymmetricShape StorageKind; + enum { Flags = LvalueBit | NoPreferredStorageOrderBit | NestByRefBit }; +}; +} // namespace internal +template +class SkewSymmetricMatrix3 : public SkewSymmetricBase> { + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef typename internal::traits::SkewSymmetricVectorType SkewSymmetricVectorType; + typedef const SkewSymmetricMatrix3& Nested; + typedef Scalar_ Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; +#endif + + protected: + SkewSymmetricVectorType m_vector; + + public: + /** const version of vector(). */ + EIGEN_DEVICE_FUNC inline const SkewSymmetricVectorType& vector() const { return m_vector; } + /** \returns a reference to the stored vector of coefficients. */ + EIGEN_DEVICE_FUNC inline SkewSymmetricVectorType& vector() { return m_vector; } + + /** Default constructor without initialization */ + EIGEN_DEVICE_FUNC inline SkewSymmetricMatrix3() {} + + /** Constructor from three scalars */ + EIGEN_DEVICE_FUNC inline SkewSymmetricMatrix3(const Scalar& x, const Scalar& y, const Scalar& z) + : m_vector(x, y, z) {} + + /** \brief Constructs a SkewSymmetricMatrix3 from an r-value vector type */ + EIGEN_DEVICE_FUNC explicit inline SkewSymmetricMatrix3(SkewSymmetricVectorType&& vec) : m_vector(std::move(vec)) {} + + /** generic constructor from expression of the coefficients */ + template + EIGEN_DEVICE_FUNC explicit inline SkewSymmetricMatrix3(const MatrixBase& other) : m_vector(other) {} + + /** Copy constructor. */ + template + EIGEN_DEVICE_FUNC inline SkewSymmetricMatrix3(const SkewSymmetricBase& other) + : m_vector(other.vector()) {} + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** copy constructor. prevent a default copy constructor from hiding the other templated constructor */ + inline SkewSymmetricMatrix3(const SkewSymmetricMatrix3& other) : m_vector(other.vector()) {} +#endif + + /** Copy operator. */ + template + EIGEN_DEVICE_FUNC SkewSymmetricMatrix3& operator=(const SkewSymmetricBase& other) { + m_vector = other.vector(); + return *this; + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + EIGEN_DEVICE_FUNC SkewSymmetricMatrix3& operator=(const SkewSymmetricMatrix3& other) { + m_vector = other.vector(); + return *this; + } +#endif + + typedef SkewSymmetricWrapper, SkewSymmetricVectorType>> + InitializeReturnType; + + /** Initializes a skew symmetric matrix with coefficients set to zero */ + EIGEN_DEVICE_FUNC static InitializeReturnType Zero() { return SkewSymmetricVectorType::Zero().asSkewSymmetric(); } + + /** Sets all coefficients to zero. */ + EIGEN_DEVICE_FUNC inline void setZero() { m_vector.setZero(); } +}; + +/** \class SkewSymmetricWrapper + * \ingroup Core_Module + * + * \brief Expression of a skew symmetric matrix + * + * \tparam SkewSymmetricVectorType_ the type of the vector of coefficients + * + * This class is an expression of a skew symmetric matrix, but not storing its own vector of coefficients, + * instead wrapping an existing vector expression. It is the return type of MatrixBase::asSkewSymmetric() + * and most of the time this is the only way that it is used. + * + * \sa class SkewSymmetricMatrix3, class SkewSymmetricBase, MatrixBase::asSkewSymmetric() + */ + +namespace internal { +template +struct traits> { + typedef SkewSymmetricVectorType_ SkewSymmetricVectorType; + typedef typename SkewSymmetricVectorType::Scalar Scalar; + typedef typename SkewSymmetricVectorType::StorageIndex StorageIndex; + typedef SkewSymmetricShape StorageKind; + typedef typename traits::XprKind XprKind; + enum { + RowsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime, + ColsAtCompileTime = SkewSymmetricVectorType::SizeAtCompileTime, + MaxRowsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime, + MaxColsAtCompileTime = SkewSymmetricVectorType::MaxSizeAtCompileTime, + Flags = (traits::Flags & LvalueBit) | NoPreferredStorageOrderBit + }; +}; +} // namespace internal + +template +class SkewSymmetricWrapper : public SkewSymmetricBase>, + internal::no_assignment_operator { + public: +#ifndef EIGEN_PARSED_BY_DOXYGEN + typedef SkewSymmetricVectorType_ SkewSymmetricVectorType; + typedef SkewSymmetricWrapper Nested; +#endif + + /** Constructor from expression of coefficients to wrap. */ + EIGEN_DEVICE_FUNC explicit inline SkewSymmetricWrapper(SkewSymmetricVectorType& a_vector) : m_vector(a_vector) {} + + /** \returns a const reference to the wrapped expression of coefficients. */ + EIGEN_DEVICE_FUNC const SkewSymmetricVectorType& vector() const { return m_vector; } + + protected: + typename SkewSymmetricVectorType::Nested m_vector; +}; + +/** \returns a pseudo-expression of a skew symmetric matrix with *this as vector of coefficients + * + * \only_for_vectors + * + * \sa class SkewSymmetricWrapper, class SkewSymmetricMatrix3, vector(), isSkewSymmetric() + **/ +template +EIGEN_DEVICE_FUNC inline const SkewSymmetricWrapper MatrixBase::asSkewSymmetric() const { + return SkewSymmetricWrapper(derived()); +} + +/** \returns true if *this is approximately equal to a skew symmetric matrix, + * within the precision given by \a prec. + */ +template +bool MatrixBase::isSkewSymmetric(const RealScalar& prec) const { + if (cols() != rows()) return false; + return (this->transpose() + *this).isZero(prec); +} + +/** \returns the matrix product of \c *this by the skew symmetric matrix \a skew. + */ +template +template +EIGEN_DEVICE_FUNC inline const Product MatrixBase::operator*( + const SkewSymmetricBase& skew) const { + return Product(derived(), skew.derived()); +} + +namespace internal { + +template <> +struct storage_kind_to_shape { + typedef SkewSymmetricShape Shape; +}; + +struct SkewSymmetric2Dense {}; + +template <> +struct AssignmentKind { + typedef SkewSymmetric2Dense Kind; +}; + +// SkewSymmetric matrix to Dense assignment +template +struct Assignment { + EIGEN_DEVICE_FUNC static void run( + DstXprType& dst, const SrcXprType& src, + const internal::assign_op& /*func*/) { + if ((dst.rows() != 3) || (dst.cols() != 3)) { + dst.resize(3, 3); + } + dst.diagonal().setZero(); + const typename SrcXprType::SkewSymmetricVectorType v = src.vector(); + dst(0, 1) = -v(2); + dst(1, 0) = v(2); + dst(0, 2) = v(1); + dst(2, 0) = -v(1); + dst(1, 2) = -v(0); + dst(2, 1) = v(0); + } + EIGEN_DEVICE_FUNC static void run( + DstXprType& dst, const SrcXprType& src, + const internal::add_assign_op& /*func*/) { + dst.vector() += src.vector(); + } + + EIGEN_DEVICE_FUNC static void run( + DstXprType& dst, const SrcXprType& src, + const internal::sub_assign_op& /*func*/) { + dst.vector() -= src.vector(); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SKEWSYMMETRICMATRIX3_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Solve.h b/o-voxel/third_party/eigen/Eigen/src/Core/Solve.h new file mode 100644 index 0000000000000000000000000000000000000000..9f047c1f32f9a7ef5f7298aa1cec828c3022f1b6 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Solve.h @@ -0,0 +1,174 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVE_H +#define EIGEN_SOLVE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +class SolveImpl; + +/** \class Solve + * \ingroup Core_Module + * + * \brief Pseudo expression representing a solving operation + * + * \tparam Decomposition the type of the matrix or decomposition object + * \tparam Rhstype the type of the right-hand side + * + * This class represents an expression of A.solve(B) + * and most of the time this is the only way it is used. + * + */ +namespace internal { + +// this solve_traits class permits to determine the evaluation type with respect to storage kind (Dense vs Sparse) +template +struct solve_traits; + +template +struct solve_traits { + typedef typename make_proper_matrix_type::type + PlainObject; +}; + +template +struct traits > + : traits< + typename solve_traits::StorageKind>::PlainObject> { + typedef typename solve_traits::StorageKind>::PlainObject + PlainObject; + typedef typename promote_index_type::type + StorageIndex; + typedef traits BaseTraits; + enum { Flags = BaseTraits::Flags & RowMajorBit, CoeffReadCost = HugeCost }; +}; + +} // namespace internal + +template +class Solve : public SolveImpl::StorageKind> { + public: + typedef typename internal::traits::PlainObject PlainObject; + typedef typename internal::traits::StorageIndex StorageIndex; + + Solve(const Decomposition &dec, const RhsType &rhs) : m_dec(dec), m_rhs(rhs) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_dec.cols(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_rhs.cols(); } + + EIGEN_DEVICE_FUNC const Decomposition &dec() const { return m_dec; } + EIGEN_DEVICE_FUNC const RhsType &rhs() const { return m_rhs; } + + protected: + const Decomposition &m_dec; + const typename internal::ref_selector::type m_rhs; +}; + +// Specialization of the Solve expression for dense results +template +class SolveImpl : public MatrixBase > { + typedef Solve Derived; + + public: + typedef MatrixBase > Base; + EIGEN_DENSE_PUBLIC_INTERFACE(Derived) + + private: + Scalar coeff(Index row, Index col) const; + Scalar coeff(Index i) const; +}; + +// Generic API dispatcher +template +class SolveImpl : public internal::generic_xpr_base, MatrixXpr, StorageKind>::type { + public: + typedef typename internal::generic_xpr_base, MatrixXpr, StorageKind>::type Base; +}; + +namespace internal { + +// Evaluator of Solve -> eval into a temporary +template +struct evaluator > + : public evaluator::PlainObject> { + typedef Solve SolveType; + typedef typename SolveType::PlainObject PlainObject; + typedef evaluator Base; + + enum { Flags = Base::Flags | EvalBeforeNestingBit }; + + EIGEN_DEVICE_FUNC explicit evaluator(const SolveType &solve) : m_result(solve.rows(), solve.cols()) { + internal::construct_at(this, m_result); + solve.dec()._solve_impl(solve.rhs(), m_result); + } + + protected: + PlainObject m_result; +}; + +// Specialization for "dst = dec.solve(rhs)" +// NOTE we need to specialize it for Dense2Dense to avoid ambiguous specialization error and a Sparse2Sparse +// specialization must exist somewhere +template +struct Assignment, internal::assign_op, Dense2Dense> { + typedef Solve SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + src.dec()._solve_impl(src.rhs(), dst); + } +}; + +// Specialization for "dst = dec.transpose().solve(rhs)" +template +struct Assignment, RhsType>, internal::assign_op, + Dense2Dense> { + typedef Solve, RhsType> SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + src.dec().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); + } +}; + +// Specialization for "dst = dec.adjoint().solve(rhs)" +template +struct Assignment< + DstXprType, + Solve, const Transpose >, + RhsType>, + internal::assign_op, Dense2Dense> { + typedef Solve, const Transpose >, + RhsType> + SrcXprType; + static void run(DstXprType &dst, const SrcXprType &src, const internal::assign_op &) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + src.dec().nestedExpression().nestedExpression().template _solve_impl_transposed(src.rhs(), dst); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/SolveTriangular.h b/o-voxel/third_party/eigen/Eigen/src/Core/SolveTriangular.h new file mode 100644 index 0000000000000000000000000000000000000000..488762fa18d49873b0f4e2e0f12c9af87196acdd --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/SolveTriangular.h @@ -0,0 +1,237 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVETRIANGULAR_H +#define EIGEN_SOLVETRIANGULAR_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// Forward declarations: +// The following two routines are implemented in the products/TriangularSolver*.h files +template +struct triangular_solve_vector; + +template +struct triangular_solve_matrix; + +// small helper struct extracting some traits on the underlying solver operation +template +class trsolve_traits { + private: + enum { RhsIsVectorAtCompileTime = (Side == OnTheLeft ? Rhs::ColsAtCompileTime : Rhs::RowsAtCompileTime) == 1 }; + + public: + enum { + Unrolling = (RhsIsVectorAtCompileTime && Rhs::SizeAtCompileTime != Dynamic && Rhs::SizeAtCompileTime <= 8) + ? CompleteUnrolling + : NoUnrolling, + RhsVectors = RhsIsVectorAtCompileTime ? 1 : Dynamic + }; +}; + +template ::Unrolling, + int RhsVectors = trsolve_traits::RhsVectors> +struct triangular_solver_selector; + +template +struct triangular_solver_selector { + typedef typename Lhs::Scalar LhsScalar; + typedef typename Rhs::Scalar RhsScalar; + typedef blas_traits LhsProductTraits; + typedef typename LhsProductTraits::ExtractType ActualLhsType; + typedef Map, Aligned> MappedRhs; + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs) { + ActualLhsType actualLhs = LhsProductTraits::extract(lhs); + + // FIXME find a way to allow an inner stride if packet_traits::size==1 + + bool useRhsDirectly = Rhs::InnerStrideAtCompileTime == 1 || rhs.innerStride() == 1; + + ei_declare_aligned_stack_constructed_variable(RhsScalar, actualRhs, rhs.size(), (useRhsDirectly ? rhs.data() : 0)); + + if (!useRhsDirectly) MappedRhs(actualRhs, rhs.size()) = rhs; + + triangular_solve_vector::run(actualLhs.cols(), + actualLhs.data(), + actualLhs.outerStride(), + actualRhs); + + if (!useRhsDirectly) rhs = MappedRhs(actualRhs, rhs.size()); + } +}; + +// the rhs is a matrix +template +struct triangular_solver_selector { + typedef typename Rhs::Scalar Scalar; + typedef blas_traits LhsProductTraits; + typedef typename LhsProductTraits::DirectLinearAccessType ActualLhsType; + + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs) { + add_const_on_value_type_t actualLhs = LhsProductTraits::extract(lhs); + + const Index size = lhs.rows(); + const Index othersize = Side == OnTheLeft ? rhs.cols() : rhs.rows(); + + typedef internal::gemm_blocking_space<(Rhs::Flags & RowMajorBit) ? RowMajor : ColMajor, Scalar, Scalar, + Rhs::MaxRowsAtCompileTime, Rhs::MaxColsAtCompileTime, + Lhs::MaxRowsAtCompileTime, 4> + BlockingType; + + // Nothing to solve. + if (actualLhs.size() == 0 || rhs.size() == 0) { + return; + } + + BlockingType blocking(rhs.rows(), rhs.cols(), size, 1, false); + + triangular_solve_matrix::run(size, othersize, &actualLhs.coeffRef(0, 0), + actualLhs.outerStride(), &rhs.coeffRef(0, 0), + rhs.innerStride(), rhs.outerStride(), blocking); + } +}; + +/*************************************************************************** + * meta-unrolling implementation + ***************************************************************************/ + +template +struct triangular_solver_unroller; + +template +struct triangular_solver_unroller { + enum { + IsLower = ((Mode & Lower) == Lower), + DiagIndex = IsLower ? LoopIndex : Size - LoopIndex - 1, + StartIndex = IsLower ? 0 : DiagIndex + 1 + }; + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs) { + if (LoopIndex > 0) + rhs.coeffRef(DiagIndex) -= lhs.row(DiagIndex) + .template segment(StartIndex) + .transpose() + .cwiseProduct(rhs.template segment(StartIndex)) + .sum(); + + if (!(Mode & UnitDiag)) rhs.coeffRef(DiagIndex) /= lhs.coeff(DiagIndex, DiagIndex); + + triangular_solver_unroller::run(lhs, rhs); + } +}; + +template +struct triangular_solver_unroller { + static EIGEN_DEVICE_FUNC void run(const Lhs&, Rhs&) {} +}; + +template +struct triangular_solver_selector { + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs) { + triangular_solver_unroller::run(lhs, rhs); + } +}; + +template +struct triangular_solver_selector { + static EIGEN_DEVICE_FUNC void run(const Lhs& lhs, Rhs& rhs) { + Transpose trLhs(lhs); + Transpose trRhs(rhs); + + triangular_solver_unroller, Transpose, + ((Mode & Upper) == Upper ? Lower : Upper) | (Mode & UnitDiag), 0, + Rhs::SizeAtCompileTime>::run(trLhs, trRhs); + } +}; + +} // end namespace internal + +/*************************************************************************** + * TriangularView methods + ***************************************************************************/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN +template +template +EIGEN_DEVICE_FUNC void TriangularViewImpl::solveInPlace( + const MatrixBase& _other) const { + OtherDerived& other = _other.const_cast_derived(); + eigen_assert(derived().cols() == derived().rows() && ((Side == OnTheLeft && derived().cols() == other.rows()) || + (Side == OnTheRight && derived().cols() == other.cols()))); + eigen_assert((!(int(Mode) & int(ZeroDiag))) && bool(int(Mode) & (int(Upper) | int(Lower)))); + // If solving for a 0x0 matrix, nothing to do, simply return. + if (derived().cols() == 0) return; + + enum { + copy = (internal::traits::Flags & RowMajorBit) && OtherDerived::IsVectorAtCompileTime && + OtherDerived::SizeAtCompileTime != 1 + }; + typedef std::conditional_t::type, OtherDerived&> + OtherCopy; + OtherCopy otherCopy(other); + + internal::triangular_solver_selector, Side, Mode>::run( + derived().nestedExpression(), otherCopy); + + if (copy) other = otherCopy; +} + +template +template +const internal::triangular_solve_retval, Other> +TriangularViewImpl::solve(const MatrixBase& other) const { + return internal::triangular_solve_retval(derived(), other.derived()); +} +#endif + +namespace internal { + +template +struct traits > { + typedef typename internal::plain_matrix_type_column_major::type ReturnType; +}; + +template +struct triangular_solve_retval : public ReturnByValue > { + typedef remove_all_t RhsNestedCleaned; + typedef ReturnByValue Base; + + triangular_solve_retval(const TriangularType& tri, const Rhs& rhs) : m_triangularMatrix(tri), m_rhs(rhs) {} + + constexpr Index rows() const noexcept { return m_rhs.rows(); } + constexpr Index cols() const noexcept { return m_rhs.cols(); } + + template + inline void evalTo(Dest& dst) const { + if (!is_same_dense(dst, m_rhs)) dst = m_rhs; + m_triangularMatrix.template solveInPlace(dst); + } + + protected: + const TriangularType& m_triangularMatrix; + typename Rhs::Nested m_rhs; +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVETRIANGULAR_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/SolverBase.h b/o-voxel/third_party/eigen/Eigen/src/Core/SolverBase.h new file mode 100644 index 0000000000000000000000000000000000000000..702fe8ccd860f9f38ab09a22c26a9a6c010748cc --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/SolverBase.h @@ -0,0 +1,167 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SOLVERBASE_H +#define EIGEN_SOLVERBASE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct solve_assertion { + template + static void run(const Derived& solver, const Rhs& b) { + solver.template _check_solve_assertion(b); + } +}; + +template +struct solve_assertion> { + typedef Transpose type; + + template + static void run(const type& transpose, const Rhs& b) { + internal::solve_assertion>::template run(transpose.nestedExpression(), b); + } +}; + +template +struct solve_assertion, const Transpose>> { + typedef CwiseUnaryOp, const Transpose> type; + + template + static void run(const type& adjoint, const Rhs& b) { + internal::solve_assertion>>::template run( + adjoint.nestedExpression(), b); + } +}; +} // end namespace internal + +/** \class SolverBase + * \brief A base class for matrix decomposition and solvers + * + * \tparam Derived the actual type of the decomposition/solver. + * + * Any matrix decomposition inheriting this base class provide the following API: + * + * \code + * MatrixType A, b, x; + * DecompositionType dec(A); + * x = dec.solve(b); // solve A * x = b + * x = dec.transpose().solve(b); // solve A^T * x = b + * x = dec.adjoint().solve(b); // solve A' * x = b + * \endcode + * + * \warning Currently, any other usage of transpose() and adjoint() are not supported and will produce compilation + * errors. + * + * \sa class PartialPivLU, class FullPivLU, class HouseholderQR, class ColPivHouseholderQR, class FullPivHouseholderQR, + * class CompleteOrthogonalDecomposition, class LLT, class LDLT, class SVDBase + */ +template +class SolverBase : public EigenBase { + public: + typedef EigenBase Base; + typedef typename internal::traits::Scalar Scalar; + typedef Scalar CoeffReturnType; + + template + friend struct internal::solve_assertion; + + ComputationInfo info() const { + // CRTP static dispatch: Calls the 'info()' method on the derived class. + // Derived must implement 'ComputationInfo info() const'. + // If not implemented, name lookup falls back to this base method, causing + // infinite recursion (detectable by -Winfinite-recursion). + return derived().info(); + } + + enum { + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + SizeAtCompileTime = (internal::size_of_xpr_at_compile_time::ret), + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime), + IsVectorAtCompileTime = + internal::traits::MaxRowsAtCompileTime == 1 || internal::traits::MaxColsAtCompileTime == 1, + NumDimensions = int(MaxSizeAtCompileTime) == 1 ? 0 + : bool(IsVectorAtCompileTime) ? 1 + : 2 + }; + + /** Default constructor */ + SolverBase() {} + + ~SolverBase() {} + + using Base::derived; + + /** \returns an expression of the solution x of \f$ A x = b \f$ using the current decomposition of A. + */ + template + inline const Solve solve(const MatrixBase& b) const { + internal::solve_assertion>::template run(derived(), b); + return Solve(derived(), b.derived()); + } + + /** \internal the return type of transpose() */ + typedef Transpose ConstTransposeReturnType; + /** \returns an expression of the transposed of the factored matrix. + * + * A typical usage is to solve for the transposed problem A^T x = b: + * \code x = dec.transpose().solve(b); \endcode + * + * \sa adjoint(), solve() + */ + inline const ConstTransposeReturnType transpose() const { return ConstTransposeReturnType(derived()); } + + /** \internal the return type of adjoint() */ + typedef std::conditional_t::IsComplex, + CwiseUnaryOp, const ConstTransposeReturnType>, + const ConstTransposeReturnType> + AdjointReturnType; + /** \returns an expression of the adjoint of the factored matrix + * + * A typical usage is to solve for the adjoint problem A' x = b: + * \code x = dec.adjoint().solve(b); \endcode + * + * For real scalar types, this function is equivalent to transpose(). + * + * \sa transpose(), solve() + */ + inline const AdjointReturnType adjoint() const { return AdjointReturnType(derived().transpose()); } + + protected: + template + void _check_solve_assertion(const Rhs& b) const { + EIGEN_ONLY_USED_FOR_DEBUG(b); + eigen_assert(derived().m_isInitialized && "Solver is not initialized."); + eigen_assert((Transpose_ ? derived().cols() : derived().rows()) == b.rows() && + "SolverBase::solve(): invalid number of rows of the right hand side matrix b"); + } +}; + +namespace internal { + +template +struct generic_xpr_base { + typedef SolverBase type; +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SOLVERBASE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/StableNorm.h b/o-voxel/third_party/eigen/Eigen/src/Core/StableNorm.h new file mode 100644 index 0000000000000000000000000000000000000000..23485ff37141829e2a24f53fbb788280f521628a --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/StableNorm.h @@ -0,0 +1,217 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STABLENORM_H +#define EIGEN_STABLENORM_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +inline void stable_norm_kernel(const ExpressionType& bl, Scalar& ssq, Scalar& scale, Scalar& invScale) { + Scalar maxCoeff = bl.cwiseAbs().maxCoeff(); + + if (maxCoeff > scale) { + ssq = ssq * numext::abs2(scale / maxCoeff); + Scalar tmp = Scalar(1) / maxCoeff; + if (tmp > NumTraits::highest()) { + invScale = NumTraits::highest(); + scale = Scalar(1) / invScale; + } else if (maxCoeff > NumTraits::highest()) // we got a INF + { + invScale = Scalar(1); + scale = maxCoeff; + } else { + scale = maxCoeff; + invScale = tmp; + } + } else if (maxCoeff != maxCoeff) // we got a NaN + { + scale = maxCoeff; + } + + // TODO if the maxCoeff is much much smaller than the current scale, + // then we can neglect this sub vector + if (scale > Scalar(0)) // if scale==0, then bl is 0 + ssq += (bl * invScale).squaredNorm(); +} + +template +void stable_norm_impl_inner_step(const VectorType& vec, RealScalar& ssq, RealScalar& scale, RealScalar& invScale) { + const Index blockSize = 4096; + + Index n = vec.size(); + Index blockEnd = numext::round_down(n, blockSize); + for (Index i = 0; i < blockEnd; i += blockSize) { + internal::stable_norm_kernel(vec.template segment(i), ssq, scale, invScale); + } + if (n > blockEnd) { + internal::stable_norm_kernel(vec.tail(n - blockEnd), ssq, scale, invScale); + } +} + +template +typename VectorType::RealScalar stable_norm_impl(const VectorType& vec, + std::enable_if_t* = 0) { + using std::abs; + using std::sqrt; + + Index n = vec.size(); + if (EIGEN_PREDICT_FALSE(n == 1)) return abs(vec.coeff(0)); + + typedef typename VectorType::RealScalar RealScalar; + RealScalar scale(0); + RealScalar invScale(1); + RealScalar ssq(0); // sum of squares + + stable_norm_impl_inner_step(vec, ssq, scale, invScale); + + return scale * sqrt(ssq); +} + +template +typename MatrixType::RealScalar stable_norm_impl(const MatrixType& mat, + std::enable_if_t* = 0) { + using std::sqrt; + + typedef typename MatrixType::RealScalar RealScalar; + RealScalar scale(0); + RealScalar invScale(1); + RealScalar ssq(0); // sum of squares + + for (Index j = 0; j < mat.outerSize(); ++j) stable_norm_impl_inner_step(mat.innerVector(j), ssq, scale, invScale); + return scale * sqrt(ssq); +} + +template +inline typename NumTraits::Scalar>::Real blueNorm_impl(const EigenBase& _vec) { + typedef typename Derived::RealScalar RealScalar; + using std::abs; + using std::pow; + using std::sqrt; + + // This program calculates the machine-dependent constants + // bl, b2, slm, s2m, relerr overfl + // from the "basic" machine-dependent numbers + // nbig, ibeta, it, iemin, iemax, rbig. + // The following define the basic machine-dependent constants. + // For portability, the PORT subprograms "ilmaeh" and "rlmach" + // are used. For any specific computer, each of the assignment + // statements can be replaced + static const int ibeta = std::numeric_limits::radix; // base for floating-point numbers + static const int it = NumTraits::digits(); // number of base-beta digits in mantissa + static const int iemin = NumTraits::min_exponent(); // minimum exponent + static const int iemax = NumTraits::max_exponent(); // maximum exponent + static const RealScalar rbig = NumTraits::highest(); // largest floating-point number + static const RealScalar b1 = + RealScalar(pow(RealScalar(ibeta), RealScalar(-((1 - iemin) / 2)))); // lower boundary of midrange + static const RealScalar b2 = + RealScalar(pow(RealScalar(ibeta), RealScalar((iemax + 1 - it) / 2))); // upper boundary of midrange + static const RealScalar s1m = + RealScalar(pow(RealScalar(ibeta), RealScalar((2 - iemin) / 2))); // scaling factor for lower range + static const RealScalar s2m = + RealScalar(pow(RealScalar(ibeta), RealScalar(-((iemax + it) / 2)))); // scaling factor for upper range + static const RealScalar eps = RealScalar(pow(double(ibeta), 1 - it)); + static const RealScalar relerr = sqrt(eps); // tolerance for neglecting asml + + const Derived& vec(_vec.derived()); + Index n = vec.size(); + RealScalar ab2 = b2 / RealScalar(n); + RealScalar asml = RealScalar(0); + RealScalar amed = RealScalar(0); + RealScalar abig = RealScalar(0); + + for (Index j = 0; j < vec.outerSize(); ++j) { + for (typename Derived::InnerIterator iter(vec, j); iter; ++iter) { + RealScalar ax = abs(iter.value()); + if (ax > ab2) + abig += numext::abs2(ax * s2m); + else if (ax < b1) + asml += numext::abs2(ax * s1m); + else + amed += numext::abs2(ax); + } + } + if (amed != amed) return amed; // we got a NaN + if (abig > RealScalar(0)) { + abig = sqrt(abig); + if (abig > rbig) // overflow, or *this contains INF values + return abig; // return INF + if (amed > RealScalar(0)) { + abig = abig / s2m; + amed = sqrt(amed); + } else + return abig / s2m; + } else if (asml > RealScalar(0)) { + if (amed > RealScalar(0)) { + abig = sqrt(amed); + amed = sqrt(asml) / s1m; + } else + return sqrt(asml) / s1m; + } else + return sqrt(amed); + asml = numext::mini(abig, amed); + abig = numext::maxi(abig, amed); + if (asml <= abig * relerr) + return abig; + else + return abig * sqrt(RealScalar(1) + numext::abs2(asml / abig)); +} + +} // end namespace internal + +/** \returns the \em l2 norm of \c *this avoiding underflow and overflow. + * This version use a blockwise two passes algorithm: + * 1 - find the absolute largest coefficient \c s + * 2 - compute \f$ s \Vert \frac{*this}{s} \Vert \f$ in a standard way + * + * For architecture/scalar types supporting vectorization, this version + * is faster than blueNorm(). Otherwise the blueNorm() is much faster. + * + * \sa norm(), blueNorm(), hypotNorm() + */ +template +inline typename NumTraits::Scalar>::Real MatrixBase::stableNorm() const { + return internal::stable_norm_impl(derived()); +} + +/** \returns the \em l2 norm of \c *this using the Blue's algorithm. + * A Portable Fortran Program to Find the Euclidean Norm of a Vector, + * ACM TOMS, Vol 4, Issue 1, 1978. + * + * For architecture/scalar types without vectorization, this version + * is much faster than stableNorm(). Otherwise the stableNorm() is faster. + * + * \sa norm(), stableNorm(), hypotNorm() + */ +template +inline typename NumTraits::Scalar>::Real MatrixBase::blueNorm() const { + return internal::blueNorm_impl(*this); +} + +/** \returns the \em l2 norm of \c *this avoiding underflow and overflow. + * This version use a concatenation of hypot() calls, and it is very slow. + * + * \sa norm(), stableNorm() + */ +template +inline typename NumTraits::Scalar>::Real MatrixBase::hypotNorm() const { + if (size() == 1) + return numext::abs(coeff(0, 0)); + else + return this->cwiseAbs().redux(internal::scalar_hypot_op()); +} + +} // end namespace Eigen + +#endif // EIGEN_STABLENORM_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/StlIterators.h b/o-voxel/third_party/eigen/Eigen/src/Core/StlIterators.h new file mode 100644 index 0000000000000000000000000000000000000000..6e1027cb48006463a621dafc3b11d6e998f6893b --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/StlIterators.h @@ -0,0 +1,619 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2018 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STLITERATORS_H +#define EIGEN_STLITERATORS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct indexed_based_stl_iterator_traits; + +template +class indexed_based_stl_iterator_base { + protected: + typedef indexed_based_stl_iterator_traits traits; + typedef typename traits::XprType XprType; + typedef indexed_based_stl_iterator_base non_const_iterator; + typedef indexed_based_stl_iterator_base const_iterator; + typedef std::conditional_t::value, non_const_iterator, const_iterator> other_iterator; + // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class: + friend class indexed_based_stl_iterator_base; + friend class indexed_based_stl_iterator_base; + + public: + typedef Index difference_type; + typedef std::random_access_iterator_tag iterator_category; + + indexed_based_stl_iterator_base() noexcept : mp_xpr(0), m_index(0) {} + indexed_based_stl_iterator_base(XprType& xpr, Index index) noexcept : mp_xpr(&xpr), m_index(index) {} + + indexed_based_stl_iterator_base(const non_const_iterator& other) noexcept + : mp_xpr(other.mp_xpr), m_index(other.m_index) {} + + indexed_based_stl_iterator_base& operator=(const non_const_iterator& other) { + mp_xpr = other.mp_xpr; + m_index = other.m_index; + return *this; + } + + Derived& operator++() { + ++m_index; + return derived(); + } + Derived& operator--() { + --m_index; + return derived(); + } + + Derived operator++(int) { + Derived prev(derived()); + operator++(); + return prev; + } + Derived operator--(int) { + Derived prev(derived()); + operator--(); + return prev; + } + + friend Derived operator+(const indexed_based_stl_iterator_base& a, Index b) { + Derived ret(a.derived()); + ret += b; + return ret; + } + friend Derived operator-(const indexed_based_stl_iterator_base& a, Index b) { + Derived ret(a.derived()); + ret -= b; + return ret; + } + friend Derived operator+(Index a, const indexed_based_stl_iterator_base& b) { + Derived ret(b.derived()); + ret += a; + return ret; + } + friend Derived operator-(Index a, const indexed_based_stl_iterator_base& b) { + Derived ret(b.derived()); + ret -= a; + return ret; + } + + Derived& operator+=(Index b) { + m_index += b; + return derived(); + } + Derived& operator-=(Index b) { + m_index -= b; + return derived(); + } + + difference_type operator-(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index - other.m_index; + } + + difference_type operator-(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index - other.m_index; + } + + bool operator==(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index == other.m_index; + } + bool operator!=(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index != other.m_index; + } + bool operator<(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index < other.m_index; + } + bool operator<=(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index <= other.m_index; + } + bool operator>(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index > other.m_index; + } + bool operator>=(const indexed_based_stl_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index >= other.m_index; + } + + bool operator==(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index == other.m_index; + } + bool operator!=(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index != other.m_index; + } + bool operator<(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index < other.m_index; + } + bool operator<=(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index <= other.m_index; + } + bool operator>(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index > other.m_index; + } + bool operator>=(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index >= other.m_index; + } + + protected: + Derived& derived() { return static_cast(*this); } + const Derived& derived() const { return static_cast(*this); } + + XprType* mp_xpr; + Index m_index; +}; + +template +class indexed_based_stl_reverse_iterator_base { + protected: + typedef indexed_based_stl_iterator_traits traits; + typedef typename traits::XprType XprType; + typedef indexed_based_stl_reverse_iterator_base non_const_iterator; + typedef indexed_based_stl_reverse_iterator_base const_iterator; + typedef std::conditional_t::value, non_const_iterator, const_iterator> other_iterator; + // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class: + friend class indexed_based_stl_reverse_iterator_base; + friend class indexed_based_stl_reverse_iterator_base; + + public: + typedef Index difference_type; + typedef std::random_access_iterator_tag iterator_category; + + indexed_based_stl_reverse_iterator_base() : mp_xpr(0), m_index(0) {} + indexed_based_stl_reverse_iterator_base(XprType& xpr, Index index) : mp_xpr(&xpr), m_index(index) {} + + indexed_based_stl_reverse_iterator_base(const non_const_iterator& other) + : mp_xpr(other.mp_xpr), m_index(other.m_index) {} + + indexed_based_stl_reverse_iterator_base& operator=(const non_const_iterator& other) { + mp_xpr = other.mp_xpr; + m_index = other.m_index; + return *this; + } + + Derived& operator++() { + --m_index; + return derived(); + } + Derived& operator--() { + ++m_index; + return derived(); + } + + Derived operator++(int) { + Derived prev(derived()); + operator++(); + return prev; + } + Derived operator--(int) { + Derived prev(derived()); + operator--(); + return prev; + } + + friend Derived operator+(const indexed_based_stl_reverse_iterator_base& a, Index b) { + Derived ret(a.derived()); + ret += b; + return ret; + } + friend Derived operator-(const indexed_based_stl_reverse_iterator_base& a, Index b) { + Derived ret(a.derived()); + ret -= b; + return ret; + } + friend Derived operator+(Index a, const indexed_based_stl_reverse_iterator_base& b) { + Derived ret(b.derived()); + ret += a; + return ret; + } + friend Derived operator-(Index a, const indexed_based_stl_reverse_iterator_base& b) { + Derived ret(b.derived()); + ret -= a; + return ret; + } + + Derived& operator+=(Index b) { + m_index -= b; + return derived(); + } + Derived& operator-=(Index b) { + m_index += b; + return derived(); + } + + difference_type operator-(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return other.m_index - m_index; + } + + difference_type operator-(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return other.m_index - m_index; + } + + bool operator==(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index == other.m_index; + } + bool operator!=(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index != other.m_index; + } + bool operator<(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index > other.m_index; + } + bool operator<=(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index >= other.m_index; + } + bool operator>(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index < other.m_index; + } + bool operator>=(const indexed_based_stl_reverse_iterator_base& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index <= other.m_index; + } + + bool operator==(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index == other.m_index; + } + bool operator!=(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index != other.m_index; + } + bool operator<(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index > other.m_index; + } + bool operator<=(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index >= other.m_index; + } + bool operator>(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index < other.m_index; + } + bool operator>=(const other_iterator& other) const { + eigen_assert(mp_xpr == other.mp_xpr); + return m_index <= other.m_index; + } + + protected: + Derived& derived() { return static_cast(*this); } + const Derived& derived() const { return static_cast(*this); } + + XprType* mp_xpr; + Index m_index; +}; + +template +class pointer_based_stl_iterator { + enum { is_lvalue = internal::is_lvalue::value }; + typedef pointer_based_stl_iterator> non_const_iterator; + typedef pointer_based_stl_iterator> const_iterator; + typedef std::conditional_t::value, non_const_iterator, const_iterator> other_iterator; + // NOTE: in C++03 we cannot declare friend classes through typedefs because we need to write friend class: + friend class pointer_based_stl_iterator>; + friend class pointer_based_stl_iterator>; + + public: + typedef Index difference_type; + typedef typename XprType::Scalar value_type; +#if EIGEN_COMP_CXXVER >= 20 && defined(__cpp_lib_concepts) && __cpp_lib_concepts >= 202002L + typedef std::conditional_t + iterator_category; +#else + typedef std::random_access_iterator_tag iterator_category; +#endif + typedef std::conditional_t pointer; + typedef std::conditional_t reference; + + pointer_based_stl_iterator() noexcept : m_ptr(0) {} + pointer_based_stl_iterator(XprType& xpr, Index index) noexcept : m_incr(xpr.innerStride()) { + m_ptr = xpr.data() + index * m_incr.value(); + } + + pointer_based_stl_iterator(const non_const_iterator& other) noexcept : m_ptr(other.m_ptr), m_incr(other.m_incr) {} + + pointer_based_stl_iterator& operator=(const non_const_iterator& other) noexcept { + m_ptr = other.m_ptr; + m_incr.setValue(other.m_incr); + return *this; + } + + reference operator*() const { return *m_ptr; } + reference operator[](Index i) const { return *(m_ptr + i * m_incr.value()); } + pointer operator->() const { return m_ptr; } + + pointer_based_stl_iterator& operator++() { + m_ptr += m_incr.value(); + return *this; + } + pointer_based_stl_iterator& operator--() { + m_ptr -= m_incr.value(); + return *this; + } + + pointer_based_stl_iterator operator++(int) { + pointer_based_stl_iterator prev(*this); + operator++(); + return prev; + } + pointer_based_stl_iterator operator--(int) { + pointer_based_stl_iterator prev(*this); + operator--(); + return prev; + } + + friend pointer_based_stl_iterator operator+(const pointer_based_stl_iterator& a, Index b) { + pointer_based_stl_iterator ret(a); + ret += b; + return ret; + } + friend pointer_based_stl_iterator operator-(const pointer_based_stl_iterator& a, Index b) { + pointer_based_stl_iterator ret(a); + ret -= b; + return ret; + } + friend pointer_based_stl_iterator operator+(Index a, const pointer_based_stl_iterator& b) { + pointer_based_stl_iterator ret(b); + ret += a; + return ret; + } + friend pointer_based_stl_iterator operator-(Index a, const pointer_based_stl_iterator& b) { + pointer_based_stl_iterator ret(b); + ret -= a; + return ret; + } + + pointer_based_stl_iterator& operator+=(Index b) { + m_ptr += b * m_incr.value(); + return *this; + } + pointer_based_stl_iterator& operator-=(Index b) { + m_ptr -= b * m_incr.value(); + return *this; + } + + difference_type operator-(const pointer_based_stl_iterator& other) const { + return (m_ptr - other.m_ptr) / m_incr.value(); + } + + difference_type operator-(const other_iterator& other) const { return (m_ptr - other.m_ptr) / m_incr.value(); } + + bool operator==(const pointer_based_stl_iterator& other) const { return m_ptr == other.m_ptr; } + bool operator!=(const pointer_based_stl_iterator& other) const { return m_ptr != other.m_ptr; } + bool operator<(const pointer_based_stl_iterator& other) const { return m_ptr < other.m_ptr; } + bool operator<=(const pointer_based_stl_iterator& other) const { return m_ptr <= other.m_ptr; } + bool operator>(const pointer_based_stl_iterator& other) const { return m_ptr > other.m_ptr; } + bool operator>=(const pointer_based_stl_iterator& other) const { return m_ptr >= other.m_ptr; } + + bool operator==(const other_iterator& other) const { return m_ptr == other.m_ptr; } + bool operator!=(const other_iterator& other) const { return m_ptr != other.m_ptr; } + bool operator<(const other_iterator& other) const { return m_ptr < other.m_ptr; } + bool operator<=(const other_iterator& other) const { return m_ptr <= other.m_ptr; } + bool operator>(const other_iterator& other) const { return m_ptr > other.m_ptr; } + bool operator>=(const other_iterator& other) const { return m_ptr >= other.m_ptr; } + + protected: + pointer m_ptr; + internal::variable_if_dynamic m_incr; +}; + +template +struct indexed_based_stl_iterator_traits> { + typedef XprType_ XprType; + typedef generic_randaccess_stl_iterator> non_const_iterator; + typedef generic_randaccess_stl_iterator> const_iterator; +}; + +template +class generic_randaccess_stl_iterator + : public indexed_based_stl_iterator_base> { + public: + typedef typename XprType::Scalar value_type; + + protected: + enum { + has_direct_access = (internal::traits::Flags & DirectAccessBit) ? 1 : 0, + is_lvalue = internal::is_lvalue::value + }; + + typedef indexed_based_stl_iterator_base Base; + using Base::m_index; + using Base::mp_xpr; + + // TODO currently const Transpose/Reshape expressions never returns const references, + // so lets return by value too. + // typedef std::conditional_t read_only_ref_t; + typedef const value_type read_only_ref_t; + + public: + typedef std::conditional_t pointer; + typedef std::conditional_t reference; + + generic_randaccess_stl_iterator() : Base() {} + generic_randaccess_stl_iterator(XprType& xpr, Index index) : Base(xpr, index) {} + generic_randaccess_stl_iterator(const typename Base::non_const_iterator& other) : Base(other) {} + using Base::operator=; + + reference operator*() const { return (*mp_xpr)(m_index); } + reference operator[](Index i) const { return (*mp_xpr)(m_index + i); } + pointer operator->() const { return &((*mp_xpr)(m_index)); } +}; + +template +struct indexed_based_stl_iterator_traits> { + typedef XprType_ XprType; + typedef subvector_stl_iterator, Direction> non_const_iterator; + typedef subvector_stl_iterator, Direction> const_iterator; +}; + +template +class subvector_stl_iterator : public indexed_based_stl_iterator_base> { + protected: + enum { is_lvalue = internal::is_lvalue::value }; + + typedef indexed_based_stl_iterator_base Base; + using Base::m_index; + using Base::mp_xpr; + + typedef std::conditional_t SubVectorType; + typedef std::conditional_t + ConstSubVectorType; + + public: + typedef std::conditional_t reference; + typedef typename reference::PlainObject value_type; + + private: + class subvector_stl_iterator_ptr { + public: + subvector_stl_iterator_ptr(const reference& subvector) : m_subvector(subvector) {} + reference* operator->() { return &m_subvector; } + + private: + reference m_subvector; + }; + + public: + typedef subvector_stl_iterator_ptr pointer; + + subvector_stl_iterator() : Base() {} + subvector_stl_iterator(XprType& xpr, Index index) : Base(xpr, index) {} + + reference operator*() const { return (*mp_xpr).template subVector(m_index); } + reference operator[](Index i) const { return (*mp_xpr).template subVector(m_index + i); } + pointer operator->() const { return (*mp_xpr).template subVector(m_index); } +}; + +template +struct indexed_based_stl_iterator_traits> { + typedef XprType_ XprType; + typedef subvector_stl_reverse_iterator, Direction> non_const_iterator; + typedef subvector_stl_reverse_iterator, Direction> const_iterator; +}; + +template +class subvector_stl_reverse_iterator + : public indexed_based_stl_reverse_iterator_base> { + protected: + enum { is_lvalue = internal::is_lvalue::value }; + + typedef indexed_based_stl_reverse_iterator_base Base; + using Base::m_index; + using Base::mp_xpr; + + typedef std::conditional_t SubVectorType; + typedef std::conditional_t + ConstSubVectorType; + + public: + typedef std::conditional_t reference; + typedef typename reference::PlainObject value_type; + + private: + class subvector_stl_reverse_iterator_ptr { + public: + subvector_stl_reverse_iterator_ptr(const reference& subvector) : m_subvector(subvector) {} + reference* operator->() { return &m_subvector; } + + private: + reference m_subvector; + }; + + public: + typedef subvector_stl_reverse_iterator_ptr pointer; + + subvector_stl_reverse_iterator() : Base() {} + subvector_stl_reverse_iterator(XprType& xpr, Index index) : Base(xpr, index) {} + + reference operator*() const { return (*mp_xpr).template subVector(m_index); } + reference operator[](Index i) const { return (*mp_xpr).template subVector(m_index + i); } + pointer operator->() const { return (*mp_xpr).template subVector(m_index); } +}; + +} // namespace internal + +/** returns an iterator to the first element of the 1D vector or array + * \only_for_vectors + * \sa end(), cbegin() + */ +template +inline typename DenseBase::iterator DenseBase::begin() { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + return iterator(derived(), 0); +} + +/** const version of begin() */ +template +inline typename DenseBase::const_iterator DenseBase::begin() const { + return cbegin(); +} + +/** returns a read-only const_iterator to the first element of the 1D vector or array + * \only_for_vectors + * \sa cend(), begin() + */ +template +inline typename DenseBase::const_iterator DenseBase::cbegin() const { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + return const_iterator(derived(), 0); +} + +/** returns an iterator to the element following the last element of the 1D vector or array + * \only_for_vectors + * \sa begin(), cend() + */ +template +inline typename DenseBase::iterator DenseBase::end() { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + return iterator(derived(), size()); +} + +/** const version of end() */ +template +inline typename DenseBase::const_iterator DenseBase::end() const { + return cend(); +} + +/** returns a read-only const_iterator to the element following the last element of the 1D vector or array + * \only_for_vectors + * \sa begin(), cend() + */ +template +inline typename DenseBase::const_iterator DenseBase::cend() const { + EIGEN_STATIC_ASSERT_VECTOR_ONLY(Derived); + return const_iterator(derived(), size()); +} + +} // namespace Eigen + +#endif // EIGEN_STLITERATORS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Stride.h b/o-voxel/third_party/eigen/Eigen/src/Core/Stride.h new file mode 100644 index 0000000000000000000000000000000000000000..5ffd34fddee0ef44465e38af6e2cc42b79e1fb8d --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Stride.h @@ -0,0 +1,114 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_STRIDE_H +#define EIGEN_STRIDE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class Stride + * \ingroup Core_Module + * + * \brief Holds strides information for Map + * + * This class holds the strides information for mapping arrays with strides with class Map. + * + * It holds two values: the inner stride and the outer stride. + * + * The inner stride is the pointer increment between two consecutive entries within a given row of a + * row-major matrix or within a given column of a column-major matrix. + * + * The outer stride is the pointer increment between two consecutive rows of a row-major matrix or + * between two consecutive columns of a column-major matrix. + * + * These two values can be passed either at compile-time as template parameters, or at runtime as + * arguments to the constructor. + * + * Indeed, this class takes two template parameters: + * \tparam OuterStrideAtCompileTime_ the outer stride, or Dynamic if you want to specify it at runtime. + * \tparam InnerStrideAtCompileTime_ the inner stride, or Dynamic if you want to specify it at runtime. + * + * Here is an example: + * \include Map_general_stride.cpp + * Output: \verbinclude Map_general_stride.out + * + * Both strides can be negative. However, a negative stride of -1 cannot be specified at compile time + * because of the ambiguity with Dynamic which is defined to -1 (historically, negative strides were + * not allowed). + * + * Note that for compile-time vectors (ColsAtCompileTime==1 or RowsAtCompile==1), + * the inner stride is the pointer increment between two consecutive elements, + * regardless of storage layout. + * + * \sa class InnerStride, class OuterStride, \ref TopicStorageOrders + */ +template +class Stride { + public: + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + enum { InnerStrideAtCompileTime = InnerStrideAtCompileTime_, OuterStrideAtCompileTime = OuterStrideAtCompileTime_ }; + + /** Default constructor, for use when strides are fixed at compile time */ + EIGEN_DEVICE_FUNC Stride() : m_outer(OuterStrideAtCompileTime), m_inner(InnerStrideAtCompileTime) { + // FIXME: for Eigen 4 we should use DynamicIndex instead of Dynamic. + // FIXME: for Eigen 4 we should also unify this API with fix<> + eigen_assert(InnerStrideAtCompileTime != Dynamic && OuterStrideAtCompileTime != Dynamic); + } + + /** Constructor allowing to pass the strides at runtime */ + EIGEN_DEVICE_FUNC Stride(Index outerStride, Index innerStride) : m_outer(outerStride), m_inner(innerStride) {} + + /** Copy constructor */ + EIGEN_DEVICE_FUNC Stride(const Stride& other) : m_outer(other.outer()), m_inner(other.inner()) {} + + /** Copy assignment operator */ + EIGEN_DEVICE_FUNC Stride& operator=(const Stride& other) { + m_outer.setValue(other.outer()); + m_inner.setValue(other.inner()); + return *this; + } + + /** \returns the outer stride */ + EIGEN_DEVICE_FUNC constexpr Index outer() const { return m_outer.value(); } + /** \returns the inner stride */ + EIGEN_DEVICE_FUNC constexpr Index inner() const { return m_inner.value(); } + + protected: + internal::variable_if_dynamic m_outer; + internal::variable_if_dynamic m_inner; +}; + +/** \brief Convenience specialization of Stride to specify only an inner stride + * See class Map for some examples */ +template +class InnerStride : public Stride<0, Value> { + typedef Stride<0, Value> Base; + + public: + EIGEN_DEVICE_FUNC InnerStride() : Base() {} + EIGEN_DEVICE_FUNC InnerStride(Index v) : Base(0, v) {} // FIXME making this explicit could break valid code +}; + +/** \brief Convenience specialization of Stride to specify only an outer stride + * See class Map for some examples */ +template +class OuterStride : public Stride { + typedef Stride Base; + + public: + EIGEN_DEVICE_FUNC OuterStride() : Base() {} + EIGEN_DEVICE_FUNC OuterStride(Index v) : Base(v, 0) {} // FIXME making this explicit could break valid code +}; + +} // end namespace Eigen + +#endif // EIGEN_STRIDE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Swap.h b/o-voxel/third_party/eigen/Eigen/src/Core/Swap.h new file mode 100644 index 0000000000000000000000000000000000000000..369133be075dfac2d542696ee3e558dea5d39f54 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Swap.h @@ -0,0 +1,99 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_SWAP_H +#define EIGEN_SWAP_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +// Overload default assignPacket behavior for swapping them +template +class generic_dense_assignment_kernel, Specialized> + : public generic_dense_assignment_kernel, BuiltIn> { + protected: + typedef generic_dense_assignment_kernel, BuiltIn> + Base; + using Base::m_dst; + using Base::m_functor; + using Base::m_src; + + public: + typedef typename Base::Scalar Scalar; + typedef typename Base::DstXprType DstXprType; + typedef swap_assign_op Functor; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE generic_dense_assignment_kernel(DstEvaluatorTypeT &dst, + const SrcEvaluatorTypeT &src, + const Functor &func, DstXprType &dstExpr) + : Base(dst, src, func, dstExpr) {} + + template + EIGEN_STRONG_INLINE void assignPacket(Index row, Index col) { + PacketType tmp = m_src.template packet(row, col); + const_cast(m_src).template writePacket( + row, col, m_dst.template packet(row, col)); + m_dst.template writePacket(row, col, tmp); + } + + template + EIGEN_STRONG_INLINE void assignPacket(Index index) { + PacketType tmp = m_src.template packet(index); + const_cast(m_src).template writePacket( + index, m_dst.template packet(index)); + m_dst.template writePacket(index, tmp); + } + + // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I + // mean no CRTP (Gael) + template + EIGEN_STRONG_INLINE void assignPacketByOuterInner(Index outer, Index inner) { + Index row = Base::rowIndexByOuterInner(outer, inner); + Index col = Base::colIndexByOuterInner(outer, inner); + assignPacket(row, col); + } + + template + EIGEN_STRONG_INLINE void assignPacketSegment(Index row, Index col, Index begin, Index count) { + PacketType tmp = m_src.template packetSegment(row, col, begin, count); + const_cast(m_src).template writePacketSegment( + row, col, m_dst.template packetSegment(row, col, begin, count), begin, count); + m_dst.template writePacketSegment(row, col, tmp, begin, count); + } + + template + EIGEN_STRONG_INLINE void assignPacketSegment(Index index, Index begin, Index count) { + PacketType tmp = m_src.template packetSegment(index, begin, count); + const_cast(m_src).template writePacketSegment( + index, m_dst.template packetSegment(index, begin, count), begin, count); + m_dst.template writePacketSegment(index, tmp, begin, count); + } + + // TODO find a simple way not to have to copy/paste this function from generic_dense_assignment_kernel, by simple I + // mean no CRTP (Gael) + template + EIGEN_STRONG_INLINE void assignPacketSegmentByOuterInner(Index outer, Index inner, Index begin, Index count) { + Index row = Base::rowIndexByOuterInner(outer, inner); + Index col = Base::colIndexByOuterInner(outer, inner); + assignPacketSegment(row, col, begin, count); + } +}; + +} // namespace internal + +} // end namespace Eigen + +#endif // EIGEN_SWAP_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Transpose.h b/o-voxel/third_party/eigen/Eigen/src/Core/Transpose.h new file mode 100644 index 0000000000000000000000000000000000000000..eacd861ad7a7a135b7bc813702fddbf38ddb0e6f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Transpose.h @@ -0,0 +1,427 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2006-2008 Benoit Jacob +// Copyright (C) 2009-2014 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRANSPOSE_H +#define EIGEN_TRANSPOSE_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > : public traits { + typedef typename ref_selector::type MatrixTypeNested; + typedef std::remove_reference_t MatrixTypeNestedPlain; + enum { + RowsAtCompileTime = MatrixType::ColsAtCompileTime, + ColsAtCompileTime = MatrixType::RowsAtCompileTime, + MaxRowsAtCompileTime = MatrixType::MaxColsAtCompileTime, + MaxColsAtCompileTime = MatrixType::MaxRowsAtCompileTime, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags0 = traits::Flags & ~(LvalueBit | NestByRefBit), + Flags1 = Flags0 | FlagsLvalueBit, + Flags = Flags1 ^ RowMajorBit, + InnerStrideAtCompileTime = inner_stride_at_compile_time::ret, + OuterStrideAtCompileTime = outer_stride_at_compile_time::ret + }; +}; +} // namespace internal + +template +class TransposeImpl; + +/** \class Transpose + * \ingroup Core_Module + * + * \brief Expression of the transpose of a matrix + * + * \tparam MatrixType the type of the object of which we are taking the transpose + * + * This class represents an expression of the transpose of a matrix. + * It is the return type of MatrixBase::transpose() and MatrixBase::adjoint() + * and most of the time this is the only way it is used. + * + * \sa MatrixBase::transpose(), MatrixBase::adjoint() + */ +template +class Transpose : public TransposeImpl::StorageKind> { + public: + typedef typename internal::ref_selector::non_const_type MatrixTypeNested; + + typedef typename TransposeImpl::StorageKind>::Base Base; + EIGEN_GENERIC_PUBLIC_INTERFACE(Transpose) + typedef internal::remove_all_t NestedExpression; + + EIGEN_DEVICE_FUNC explicit EIGEN_STRONG_INLINE Transpose(MatrixType& matrix) : m_matrix(matrix) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(Transpose) + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index rows() const noexcept { return m_matrix.cols(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr Index cols() const noexcept { return m_matrix.rows(); } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const internal::remove_all_t& nestedExpression() const { + return m_matrix; + } + + /** \returns the nested expression */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE std::remove_reference_t& nestedExpression() { + return m_matrix; + } + + /** \internal */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void resize(Index nrows, Index ncols) { m_matrix.resize(ncols, nrows); } + + protected: + typename internal::ref_selector::non_const_type m_matrix; +}; + +namespace internal { + +template ::ret> +struct TransposeImpl_base { + typedef typename dense_xpr_base >::type type; +}; + +template +struct TransposeImpl_base { + typedef typename dense_xpr_base >::type type; +}; + +} // end namespace internal + +// Generic API dispatcher +template +class TransposeImpl : public internal::generic_xpr_base >::type { + public: + typedef typename internal::generic_xpr_base >::type Base; +}; + +template +class TransposeImpl : public internal::TransposeImpl_base::type { + public: + typedef typename internal::TransposeImpl_base::type Base; + using Base::coeffRef; + EIGEN_DENSE_PUBLIC_INTERFACE(Transpose) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TransposeImpl) + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index innerStride() const { return derived().nestedExpression().innerStride(); } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Index outerStride() const { return derived().nestedExpression().outerStride(); } + + typedef std::conditional_t::value, Scalar, const Scalar> ScalarWithConstIfNotLvalue; + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr ScalarWithConstIfNotLvalue* data() { + return derived().nestedExpression().data(); + } + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE constexpr const Scalar* data() const { + return derived().nestedExpression().data(); + } + + // FIXME: shall we keep the const version of coeffRef? + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index rowId, Index colId) const { + return derived().nestedExpression().coeffRef(colId, rowId); + } + + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const Scalar& coeffRef(Index index) const { + return derived().nestedExpression().coeffRef(index); + } + + protected: + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TransposeImpl) +}; + +/** \returns an expression of the transpose of *this. + * + * Example: \include MatrixBase_transpose.cpp + * Output: \verbinclude MatrixBase_transpose.out + * + * \warning If you want to replace a matrix by its own transpose, do \b NOT do this: + * \code + * m = m.transpose(); // bug!!! caused by aliasing effect + * \endcode + * Instead, use the transposeInPlace() method: + * \code + * m.transposeInPlace(); + * \endcode + * which gives Eigen good opportunities for optimization, or alternatively you can also do: + * \code + * m = m.transpose().eval(); + * \endcode + * + * \sa transposeInPlace(), adjoint() */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE typename DenseBase::TransposeReturnType DenseBase::transpose() { + return TransposeReturnType(derived()); +} + +/** This is the const version of transpose(). + * + * Make sure you read the warning for transpose() ! + * + * \sa transposeInPlace(), adjoint() */ +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE const typename DenseBase::ConstTransposeReturnType +DenseBase::transpose() const { + return ConstTransposeReturnType(derived()); +} + +/** \returns an expression of the adjoint (i.e. conjugate transpose) of *this. + * + * Example: \include MatrixBase_adjoint.cpp + * Output: \verbinclude MatrixBase_adjoint.out + * + * \warning If you want to replace a matrix by its own adjoint, do \b NOT do this: + * \code + * m = m.adjoint(); // bug!!! caused by aliasing effect + * \endcode + * Instead, use the adjointInPlace() method: + * \code + * m.adjointInPlace(); + * \endcode + * which gives Eigen good opportunities for optimization, or alternatively you can also do: + * \code + * m = m.adjoint().eval(); + * \endcode + * + * \sa adjointInPlace(), transpose(), conjugate(), class Transpose, class internal::scalar_conjugate_op */ +template +EIGEN_DEVICE_FUNC inline const typename MatrixBase::AdjointReturnType MatrixBase::adjoint() const { + return AdjointReturnType(this->transpose()); +} + +/*************************************************************************** + * "in place" transpose implementation + ***************************************************************************/ + +namespace internal { + +template ::size)) && + (internal::evaluator::Flags & PacketAccessBit)> +struct inplace_transpose_selector; + +template +struct inplace_transpose_selector { // square matrix + static void run(MatrixType& m) { + m.matrix().template triangularView().swap( + m.matrix().transpose().template triangularView()); + } +}; + +template +struct inplace_transpose_selector { // PacketSize x PacketSize + static void run(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits::type Packet; + const Index PacketSize = internal::packet_traits::size; + const Index Alignment = internal::evaluator::Alignment; + PacketBlock A; + for (Index i = 0; i < PacketSize; ++i) A.packet[i] = m.template packetByOuterInner(i, 0); + internal::ptranspose(A); + for (Index i = 0; i < PacketSize; ++i) + m.template writePacket(m.rowIndexByOuterInner(i, 0), m.colIndexByOuterInner(i, 0), A.packet[i]); + } +}; + +template +void BlockedInPlaceTranspose(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + typedef typename internal::packet_traits::type Packet; + const Index PacketSize = internal::packet_traits::size; + eigen_assert(m.rows() == m.cols()); + int row_start = 0; + for (; row_start + PacketSize <= m.rows(); row_start += PacketSize) { + for (int col_start = row_start; col_start + PacketSize <= m.cols(); col_start += PacketSize) { + PacketBlock A; + if (row_start == col_start) { + for (Index i = 0; i < PacketSize; ++i) + A.packet[i] = m.template packetByOuterInner(row_start + i, col_start); + internal::ptranspose(A); + for (Index i = 0; i < PacketSize; ++i) + m.template writePacket(m.rowIndexByOuterInner(row_start + i, col_start), + m.colIndexByOuterInner(row_start + i, col_start), A.packet[i]); + } else { + PacketBlock B; + for (Index i = 0; i < PacketSize; ++i) { + A.packet[i] = m.template packetByOuterInner(row_start + i, col_start); + B.packet[i] = m.template packetByOuterInner(col_start + i, row_start); + } + internal::ptranspose(A); + internal::ptranspose(B); + for (Index i = 0; i < PacketSize; ++i) { + m.template writePacket(m.rowIndexByOuterInner(row_start + i, col_start), + m.colIndexByOuterInner(row_start + i, col_start), B.packet[i]); + m.template writePacket(m.rowIndexByOuterInner(col_start + i, row_start), + m.colIndexByOuterInner(col_start + i, row_start), A.packet[i]); + } + } + } + } + for (Index row = row_start; row < m.rows(); ++row) { + m.matrix().row(row).head(row).swap(m.matrix().col(row).head(row).transpose()); + } +} + +template +struct inplace_transpose_selector { // non square or dynamic matrix + static void run(MatrixType& m) { + typedef typename MatrixType::Scalar Scalar; + if (m.rows() == m.cols()) { + const Index PacketSize = internal::packet_traits::size; + if (!NumTraits::IsComplex && m.rows() >= PacketSize) { + if ((m.rows() % PacketSize) == 0) + BlockedInPlaceTranspose::Alignment>(m); + else + BlockedInPlaceTranspose(m); + } else { + m.matrix().template triangularView().swap( + m.matrix().transpose().template triangularView()); + } + } else { + m = m.transpose().eval(); + } + } +}; + +} // end namespace internal + +/** This is the "in place" version of transpose(): it replaces \c *this by its own transpose. + * Thus, doing + * \code + * m.transposeInPlace(); + * \endcode + * has the same effect on m as doing + * \code + * m = m.transpose().eval(); + * \endcode + * and is faster and also safer because in the latter line of code, forgetting the eval() results + * in a bug caused by \ref TopicAliasing "aliasing". + * + * Notice however that this method is only useful if you want to replace a matrix by its own transpose. + * If you just need the transpose of a matrix, use transpose(). + * + * \note if the matrix is not square, then \c *this must be a resizable matrix. + * This excludes (non-square) fixed-size matrices, block-expressions and maps. + * + * \sa transpose(), adjoint(), adjointInPlace() */ +template +EIGEN_DEVICE_FUNC inline void DenseBase::transposeInPlace() { + eigen_assert((rows() == cols() || (RowsAtCompileTime == Dynamic && ColsAtCompileTime == Dynamic)) && + "transposeInPlace() called on a non-square non-resizable matrix"); + internal::inplace_transpose_selector::run(derived()); +} + +/*************************************************************************** + * "in place" adjoint implementation + ***************************************************************************/ + +/** This is the "in place" version of adjoint(): it replaces \c *this by its own transpose. + * Thus, doing + * \code + * m.adjointInPlace(); + * \endcode + * has the same effect on m as doing + * \code + * m = m.adjoint().eval(); + * \endcode + * and is faster and also safer because in the latter line of code, forgetting the eval() results + * in a bug caused by aliasing. + * + * Notice however that this method is only useful if you want to replace a matrix by its own adjoint. + * If you just need the adjoint of a matrix, use adjoint(). + * + * \note if the matrix is not square, then \c *this must be a resizable matrix. + * This excludes (non-square) fixed-size matrices, block-expressions and maps. + * + * \sa transpose(), adjoint(), transposeInPlace() */ +template +EIGEN_DEVICE_FUNC inline void MatrixBase::adjointInPlace() { + derived() = adjoint().eval(); +} + +#ifndef EIGEN_NO_DEBUG + +// The following is to detect aliasing problems in most common cases. + +namespace internal { + +template +struct check_transpose_aliasing_compile_time_selector { + enum { ret = bool(blas_traits::IsTransposed) != DestIsTransposed }; +}; + +template +struct check_transpose_aliasing_compile_time_selector > { + enum { + ret = bool(blas_traits::IsTransposed) != DestIsTransposed || + bool(blas_traits::IsTransposed) != DestIsTransposed + }; +}; + +template +struct check_transpose_aliasing_run_time_selector { + EIGEN_DEVICE_FUNC static bool run(const Scalar* dest, const OtherDerived& src) { + return (bool(blas_traits::IsTransposed) != DestIsTransposed) && + (dest != 0 && dest == (const Scalar*)extract_data(src)); + } +}; + +template +struct check_transpose_aliasing_run_time_selector > { + EIGEN_DEVICE_FUNC static bool run(const Scalar* dest, const CwiseBinaryOp& src) { + return ((blas_traits::IsTransposed != DestIsTransposed) && + (dest != 0 && dest == (const Scalar*)extract_data(src.lhs()))) || + ((blas_traits::IsTransposed != DestIsTransposed) && + (dest != 0 && dest == (const Scalar*)extract_data(src.rhs()))); + } +}; + +// the following selector, checkTransposeAliasing_impl, based on MightHaveTransposeAliasing, +// is because when the condition controlling the assert is known at compile time, ICC emits a warning. +// This is actually a good warning: in expressions that don't have any transposing, the condition is +// known at compile time to be false, and using that, we can avoid generating the code of the assert again +// and again for all these expressions that don't need it. + +template ::IsTransposed, OtherDerived>::ret> +struct checkTransposeAliasing_impl { + EIGEN_DEVICE_FUNC static void run(const Derived& dst, const OtherDerived& other) { + eigen_assert( + (!check_transpose_aliasing_run_time_selector::IsTransposed, + OtherDerived>::run(extract_data(dst), other)) && + "aliasing detected during transposition, use transposeInPlace() " + "or evaluate the rhs into a temporary using .eval()"); + } +}; + +template +struct checkTransposeAliasing_impl { + EIGEN_DEVICE_FUNC static void run(const Derived&, const OtherDerived&) {} +}; + +template +EIGEN_DEVICE_FUNC inline void check_for_aliasing(const Dst& dst, const Src& src) { + if ((!Dst::IsVectorAtCompileTime) && dst.rows() > 1 && dst.cols() > 1) + internal::checkTransposeAliasing_impl::run(dst, src); +} + +} // end namespace internal + +#endif // EIGEN_NO_DEBUG + +} // end namespace Eigen + +#endif // EIGEN_TRANSPOSE_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/Transpositions.h b/o-voxel/third_party/eigen/Eigen/src/Core/Transpositions.h new file mode 100644 index 0000000000000000000000000000000000000000..164a8bfa504068fa831031f3fbb7c733d73c0299 --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/Transpositions.h @@ -0,0 +1,323 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2010-2011 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRANSPOSITIONS_H +#define EIGEN_TRANSPOSITIONS_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +template +class TranspositionsBase { + typedef internal::traits Traits; + + public: + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; + typedef Eigen::Index Index; ///< \deprecated since Eigen 3.3 + + EIGEN_DEVICE_FUNC Derived& derived() { return *static_cast(this); } + EIGEN_DEVICE_FUNC const Derived& derived() const { return *static_cast(this); } + + /** Copies the \a other transpositions into \c *this */ + template + Derived& operator=(const TranspositionsBase& other) { + indices() = other.indices(); + return derived(); + } + + /** \returns the number of transpositions */ + EIGEN_DEVICE_FUNC Index size() const { return indices().size(); } + /** \returns the number of rows of the equivalent permutation matrix */ + EIGEN_DEVICE_FUNC Index rows() const { return indices().size(); } + /** \returns the number of columns of the equivalent permutation matrix */ + EIGEN_DEVICE_FUNC Index cols() const { return indices().size(); } + + /** Direct access to the underlying index vector */ + EIGEN_DEVICE_FUNC inline const StorageIndex& coeff(Index i) const { return indices().coeff(i); } + /** Direct access to the underlying index vector */ + inline StorageIndex& coeffRef(Index i) { return indices().coeffRef(i); } + /** Direct access to the underlying index vector */ + inline const StorageIndex& operator()(Index i) const { return indices()(i); } + /** Direct access to the underlying index vector */ + inline StorageIndex& operator()(Index i) { return indices()(i); } + /** Direct access to the underlying index vector */ + inline const StorageIndex& operator[](Index i) const { return indices()(i); } + /** Direct access to the underlying index vector */ + inline StorageIndex& operator[](Index i) { return indices()(i); } + + /** const version of indices(). */ + EIGEN_DEVICE_FUNC const IndicesType& indices() const { return derived().indices(); } + /** \returns a reference to the stored array representing the transpositions. */ + EIGEN_DEVICE_FUNC IndicesType& indices() { return derived().indices(); } + + /** Resizes to given size. */ + inline void resize(Index newSize) { indices().resize(newSize); } + + /** Sets \c *this to represents an identity transformation */ + void setIdentity() { + for (StorageIndex i = 0; i < indices().size(); ++i) coeffRef(i) = i; + } + + // FIXME: do we want such methods ? + // might be useful when the target matrix expression is complex, e.g.: + // object.matrix().block(..,..,..,..) = trans * object.matrix().block(..,..,..,..); + /* + template + void applyForwardToRows(MatrixType& mat) const + { + for(Index k=0 ; k + void applyBackwardToRows(MatrixType& mat) const + { + for(Index k=size()-1 ; k>=0 ; --k) + if(m_indices(k)!=k) + mat.row(k).swap(mat.row(m_indices(k))); + } + */ + + /** \returns the inverse transformation */ + inline Transpose inverse() const { return Transpose(derived()); } + + /** \returns the transpose transformation */ + inline Transpose transpose() const { return Transpose(derived()); } + + protected: +}; + +namespace internal { +template +struct traits > + : traits > { + typedef Matrix IndicesType; + typedef TranspositionsStorage StorageKind; +}; +} // namespace internal + +/** \class Transpositions + * \ingroup Core_Module + * + * \brief Represents a sequence of transpositions (row/column interchange) + * + * \tparam SizeAtCompileTime the number of transpositions, or Dynamic + * \tparam MaxSizeAtCompileTime the maximum number of transpositions, or Dynamic. This optional parameter defaults to + * SizeAtCompileTime. Most of the time, you should not have to specify it. + * + * This class represents a permutation transformation as a sequence of \em n transpositions + * \f$[T_{n-1} \ldots T_{i} \ldots T_{0}]\f$. It is internally stored as a vector of integers \c indices. + * Each transposition \f$ T_{i} \f$ applied on the left of a matrix (\f$ T_{i} M\f$) interchanges + * the rows \c i and \c indices[i] of the matrix \c M. + * A transposition applied on the right (e.g., \f$ M T_{i}\f$) yields a column interchange. + * + * Compared to the class PermutationMatrix, such a sequence of transpositions is what is + * computed during a decomposition with pivoting, and it is faster when applying the permutation in-place. + * + * To apply a sequence of transpositions to a matrix, simply use the operator * as in the following example: + * \code + * Transpositions tr; + * MatrixXf mat; + * mat = tr * mat; + * \endcode + * In this example, we detect that the matrix appears on both side, and so the transpositions + * are applied in-place without any temporary or extra copy. + * + * \sa class PermutationMatrix + */ + +template +class Transpositions + : public TranspositionsBase > { + typedef internal::traits Traits; + + public: + typedef TranspositionsBase Base; + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; + + inline Transpositions() {} + + /** Copy constructor. */ + template + inline Transpositions(const TranspositionsBase& other) : m_indices(other.indices()) {} + + /** Generic constructor from expression of the transposition indices. */ + template + explicit inline Transpositions(const MatrixBase& indices) : m_indices(indices) {} + + /** Copies the \a other transpositions into \c *this */ + template + Transpositions& operator=(const TranspositionsBase& other) { + return Base::operator=(other); + } + + /** Constructs an uninitialized permutation matrix of given size. + */ + inline Transpositions(Index size) : m_indices(size) {} + + /** const version of indices(). */ + EIGEN_DEVICE_FUNC const IndicesType& indices() const { return m_indices; } + /** \returns a reference to the stored array representing the transpositions. */ + EIGEN_DEVICE_FUNC IndicesType& indices() { return m_indices; } + + protected: + IndicesType m_indices; +}; + +namespace internal { +template +struct traits, PacketAccess_> > + : traits > { + typedef Map, PacketAccess_> IndicesType; + typedef StorageIndex_ StorageIndex; + typedef TranspositionsStorage StorageKind; +}; +} // namespace internal + +template +class Map, PacketAccess> + : public TranspositionsBase< + Map, PacketAccess> > { + typedef internal::traits Traits; + + public: + typedef TranspositionsBase Base; + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; + + explicit inline Map(const StorageIndex* indicesPtr) : m_indices(indicesPtr) {} + + inline Map(const StorageIndex* indicesPtr, Index size) : m_indices(indicesPtr, size) {} + + /** Copies the \a other transpositions into \c *this */ + template + Map& operator=(const TranspositionsBase& other) { + return Base::operator=(other); + } + +#ifndef EIGEN_PARSED_BY_DOXYGEN + /** This is a special case of the templated operator=. Its purpose is to + * prevent a default operator= from hiding the templated operator=. + */ + Map& operator=(const Map& other) { + m_indices = other.m_indices; + return *this; + } +#endif + + /** const version of indices(). */ + EIGEN_DEVICE_FUNC const IndicesType& indices() const { return m_indices; } + + /** \returns a reference to the stored array representing the transpositions. */ + EIGEN_DEVICE_FUNC IndicesType& indices() { return m_indices; } + + protected: + IndicesType m_indices; +}; + +namespace internal { +template +struct traits > : traits > { + typedef TranspositionsStorage StorageKind; +}; +} // namespace internal + +template +class TranspositionsWrapper : public TranspositionsBase > { + typedef internal::traits Traits; + + public: + typedef TranspositionsBase Base; + typedef typename Traits::IndicesType IndicesType; + typedef typename IndicesType::Scalar StorageIndex; + + explicit inline TranspositionsWrapper(IndicesType& indices) : m_indices(indices) {} + + /** Copies the \a other transpositions into \c *this */ + template + TranspositionsWrapper& operator=(const TranspositionsBase& other) { + return Base::operator=(other); + } + + /** const version of indices(). */ + EIGEN_DEVICE_FUNC const IndicesType& indices() const { return m_indices; } + + /** \returns a reference to the stored array representing the transpositions. */ + EIGEN_DEVICE_FUNC IndicesType& indices() { return m_indices; } + + protected: + typename IndicesType::Nested m_indices; +}; + +/** \returns the \a matrix with the \a transpositions applied to the columns. + */ +template +EIGEN_DEVICE_FUNC const Product operator*( + const MatrixBase& matrix, const TranspositionsBase& transpositions) { + return Product(matrix.derived(), transpositions.derived()); +} + +/** \returns the \a matrix with the \a transpositions applied to the rows. + */ +template +EIGEN_DEVICE_FUNC const Product operator*( + const TranspositionsBase& transpositions, const MatrixBase& matrix) { + return Product(transpositions.derived(), matrix.derived()); +} + +// Template partial specialization for transposed/inverse transpositions + +namespace internal { + +template +struct traits > > : traits {}; + +} // end namespace internal + +template +class Transpose > { + typedef TranspositionsDerived TranspositionType; + typedef typename TranspositionType::IndicesType IndicesType; + + public: + explicit Transpose(const TranspositionType& t) : m_transpositions(t) {} + + EIGEN_DEVICE_FUNC constexpr Index size() const noexcept { return m_transpositions.size(); } + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_transpositions.size(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_transpositions.size(); } + + /** \returns the \a matrix with the inverse transpositions applied to the columns. + */ + template + friend const Product operator*(const MatrixBase& matrix, + const Transpose& trt) { + return Product(matrix.derived(), trt); + } + + /** \returns the \a matrix with the inverse transpositions applied to the rows. + */ + template + const Product operator*(const MatrixBase& matrix) const { + return Product(*this, matrix.derived()); + } + + EIGEN_DEVICE_FUNC const TranspositionType& nestedExpression() const { return m_transpositions; } + + protected: + const TranspositionType& m_transpositions; +}; + +} // end namespace Eigen + +#endif // EIGEN_TRANSPOSITIONS_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/TriangularMatrix.h b/o-voxel/third_party/eigen/Eigen/src/Core/TriangularMatrix.h new file mode 100644 index 0000000000000000000000000000000000000000..22da695ce15a0a972162b18410864d4521802a5f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/TriangularMatrix.h @@ -0,0 +1,905 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008 Benoit Jacob +// Copyright (C) 2008-2009 Gael Guennebaud +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_TRIANGULARMATRIX_H +#define EIGEN_TRIANGULARMATRIX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { + +template +struct triangular_solve_retval; + +} + +/** \class TriangularBase + * \ingroup Core_Module + * + * \brief Base class for triangular part in a matrix + */ +template +class TriangularBase : public EigenBase { + public: + enum { + Mode = internal::traits::Mode, + RowsAtCompileTime = internal::traits::RowsAtCompileTime, + ColsAtCompileTime = internal::traits::ColsAtCompileTime, + MaxRowsAtCompileTime = internal::traits::MaxRowsAtCompileTime, + MaxColsAtCompileTime = internal::traits::MaxColsAtCompileTime, + + SizeAtCompileTime = (internal::size_of_xpr_at_compile_time::ret), + /**< This is equal to the number of coefficients, i.e. the number of + * rows times the number of columns, or to \a Dynamic if this is not + * known at compile-time. \sa RowsAtCompileTime, ColsAtCompileTime */ + + MaxSizeAtCompileTime = internal::size_at_compile_time(internal::traits::MaxRowsAtCompileTime, + internal::traits::MaxColsAtCompileTime) + + }; + typedef typename internal::traits::Scalar Scalar; + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::StorageIndex StorageIndex; + typedef typename internal::traits::FullMatrixType DenseMatrixType; + typedef DenseMatrixType DenseType; + typedef Derived const& Nested; + + EIGEN_DEVICE_FUNC inline TriangularBase() { + eigen_assert(!((int(Mode) & int(UnitDiag)) && (int(Mode) & int(ZeroDiag)))); + } + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return derived().rows(); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return derived().cols(); } + EIGEN_DEVICE_FUNC constexpr Index outerStride() const noexcept { return derived().outerStride(); } + EIGEN_DEVICE_FUNC constexpr Index innerStride() const noexcept { return derived().innerStride(); } + + // dummy resize function + EIGEN_DEVICE_FUNC void resize(Index rows, Index cols) { + EIGEN_UNUSED_VARIABLE(rows); + EIGEN_UNUSED_VARIABLE(cols); + eigen_assert(rows == this->rows() && cols == this->cols()); + } + + EIGEN_DEVICE_FUNC inline Scalar coeff(Index row, Index col) const { return derived().coeff(row, col); } + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { return derived().coeffRef(row, col); } + + /** \see MatrixBase::copyCoeff(row,col) + */ + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void copyCoeff(Index row, Index col, Other& other) { + derived().coeffRef(row, col) = other.coeff(row, col); + } + + EIGEN_DEVICE_FUNC inline Scalar operator()(Index row, Index col) const { + check_coordinates(row, col); + return coeff(row, col); + } + EIGEN_DEVICE_FUNC inline Scalar& operator()(Index row, Index col) { + check_coordinates(row, col); + return coeffRef(row, col); + } + +#ifdef EIGEN_MULTIDIMENSIONAL_SUBSCRIPT + EIGEN_DEVICE_FUNC inline Scalar operator[](Index row, Index col) const { return operator()(row, col); } + EIGEN_DEVICE_FUNC inline Scalar& operator[](Index row, Index col) { return operator()(row, col); } +#endif + +#ifndef EIGEN_PARSED_BY_DOXYGEN + EIGEN_DEVICE_FUNC inline const Derived& derived() const { return *static_cast(this); } + EIGEN_DEVICE_FUNC inline Derived& derived() { return *static_cast(this); } +#endif // not EIGEN_PARSED_BY_DOXYGEN + + template + EIGEN_DEVICE_FUNC void evalTo(MatrixBase& other) const; + template + EIGEN_DEVICE_FUNC void evalToLazy(MatrixBase& other) const; + + EIGEN_DEVICE_FUNC DenseMatrixType toDenseMatrix() const { + DenseMatrixType res(rows(), cols()); + evalToLazy(res); + return res; + } + + protected: + void check_coordinates(Index row, Index col) const { + EIGEN_ONLY_USED_FOR_DEBUG(row); + EIGEN_ONLY_USED_FOR_DEBUG(col); + eigen_assert(col >= 0 && col < cols() && row >= 0 && row < rows()); + const int mode = int(Mode) & ~SelfAdjoint; + EIGEN_ONLY_USED_FOR_DEBUG(mode); + eigen_assert((mode == Upper && col >= row) || (mode == Lower && col <= row) || + ((mode == StrictlyUpper || mode == UnitUpper) && col > row) || + ((mode == StrictlyLower || mode == UnitLower) && col < row)); + } + +#ifdef EIGEN_INTERNAL_DEBUGGING + void check_coordinates_internal(Index row, Index col) const { check_coordinates(row, col); } +#else + void check_coordinates_internal(Index, Index) const {} +#endif +}; + +/** \class TriangularView + * \ingroup Core_Module + * + * \brief Expression of a triangular part in a matrix + * + * \tparam MatrixType the type of the object in which we are taking the triangular part + * \tparam Mode the kind of triangular matrix expression to construct. Can be #Upper, + * #Lower, #UnitUpper, #UnitLower, #StrictlyUpper, or #StrictlyLower. + * This is in fact a bit field; it must have either #Upper or #Lower, + * and additionally it may have #UnitDiag or #ZeroDiag or neither. + * + * This class represents a triangular part of a matrix, not necessarily square. Strictly speaking, for rectangular + * matrices one should speak of "trapezoid" parts. This class is the return type + * of MatrixBase::triangularView() and SparseMatrixBase::triangularView(), and most of the time this is the only way it + * is used. + * + * \sa MatrixBase::triangularView() + */ +namespace internal { +template +struct traits> : traits { + typedef typename ref_selector::non_const_type MatrixTypeNested; + typedef std::remove_reference_t MatrixTypeNestedNonRef; + typedef remove_all_t MatrixTypeNestedCleaned; + typedef typename MatrixType::PlainObject FullMatrixType; + typedef MatrixType ExpressionType; + enum { + Mode = Mode_, + FlagsLvalueBit = is_lvalue::value ? LvalueBit : 0, + Flags = (MatrixTypeNestedCleaned::Flags & (HereditaryBits | FlagsLvalueBit) & + (~(PacketAccessBit | DirectAccessBit | LinearAccessBit))) + }; +}; +} // namespace internal + +template +class TriangularViewImpl; + +template +class TriangularView + : public TriangularViewImpl::StorageKind> { + public: + typedef TriangularViewImpl::StorageKind> Base; + typedef typename internal::traits::Scalar Scalar; + typedef MatrixType_ MatrixType; + + protected: + typedef typename internal::traits::MatrixTypeNested MatrixTypeNested; + typedef typename internal::traits::MatrixTypeNestedNonRef MatrixTypeNestedNonRef; + + typedef internal::remove_all_t MatrixConjugateReturnType; + typedef TriangularView, Mode_> ConstTriangularView; + + public: + typedef typename internal::traits::StorageKind StorageKind; + typedef typename internal::traits::MatrixTypeNestedCleaned NestedExpression; + + enum { + Mode = Mode_, + Flags = internal::traits::Flags, + TransposeMode = (int(Mode) & int(Upper) ? Lower : 0) | (int(Mode) & int(Lower) ? Upper : 0) | + (int(Mode) & int(UnitDiag)) | (int(Mode) & int(ZeroDiag)), + IsVectorAtCompileTime = false + }; + + EIGEN_DEVICE_FUNC explicit inline TriangularView(MatrixType& matrix) : m_matrix(matrix) {} + + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(TriangularView) + + /** \copydoc EigenBase::rows() */ + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return m_matrix.rows(); } + /** \copydoc EigenBase::cols() */ + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return m_matrix.cols(); } + + /** \returns a const reference to the nested expression */ + EIGEN_DEVICE_FUNC const NestedExpression& nestedExpression() const { return m_matrix; } + + /** \returns a reference to the nested expression */ + EIGEN_DEVICE_FUNC NestedExpression& nestedExpression() { return m_matrix; } + + typedef TriangularView ConjugateReturnType; + /** \sa MatrixBase::conjugate() const */ + EIGEN_DEVICE_FUNC inline const ConjugateReturnType conjugate() const { + return ConjugateReturnType(m_matrix.conjugate()); + } + + /** \returns an expression of the complex conjugate of \c *this if Cond==true, + * returns \c *this otherwise. + */ + template + EIGEN_DEVICE_FUNC inline std::conditional_t conjugateIf() const { + typedef std::conditional_t ReturnType; + return ReturnType(m_matrix.template conjugateIf()); + } + + typedef TriangularView AdjointReturnType; + /** \sa MatrixBase::adjoint() const */ + EIGEN_DEVICE_FUNC inline const AdjointReturnType adjoint() const { return AdjointReturnType(m_matrix.adjoint()); } + + typedef TriangularView TransposeReturnType; + /** \sa MatrixBase::transpose() */ + template + EIGEN_DEVICE_FUNC inline TransposeReturnType transpose( + std::enable_if_t::value, Dummy*> = nullptr) { + typename MatrixType::TransposeReturnType tmp(m_matrix); + return TransposeReturnType(tmp); + } + + typedef TriangularView ConstTransposeReturnType; + /** \sa MatrixBase::transpose() const */ + EIGEN_DEVICE_FUNC inline const ConstTransposeReturnType transpose() const { + return ConstTransposeReturnType(m_matrix.transpose()); + } + + template + EIGEN_DEVICE_FUNC inline const Solve solve(const MatrixBase& other) const { + return Solve(*this, other.derived()); + } + +// workaround MSVC ICE +#if EIGEN_COMP_MSVC + template + EIGEN_DEVICE_FUNC inline const internal::triangular_solve_retval solve( + const MatrixBase& other) const { + return Base::template solve(other); + } +#else + using Base::solve; +#endif + + /** \returns a selfadjoint view of the referenced triangular part which must be either \c #Upper or \c #Lower. + * + * This is a shortcut for \code this->nestedExpression().selfadjointView<(*this)::Mode>() \endcode + * \sa MatrixBase::selfadjointView() */ + EIGEN_DEVICE_FUNC SelfAdjointView selfadjointView() { + EIGEN_STATIC_ASSERT((Mode & (UnitDiag | ZeroDiag)) == 0, PROGRAMMING_ERROR); + return SelfAdjointView(m_matrix); + } + + /** This is the const version of selfadjointView() */ + EIGEN_DEVICE_FUNC const SelfAdjointView selfadjointView() const { + EIGEN_STATIC_ASSERT((Mode & (UnitDiag | ZeroDiag)) == 0, PROGRAMMING_ERROR); + return SelfAdjointView(m_matrix); + } + + /** \returns the determinant of the triangular matrix + * \sa MatrixBase::determinant() */ + EIGEN_DEVICE_FUNC Scalar determinant() const { + if (Mode & UnitDiag) + return 1; + else if (Mode & ZeroDiag) + return 0; + else + return m_matrix.diagonal().prod(); + } + + protected: + MatrixTypeNested m_matrix; +}; + +/** \ingroup Core_Module + * + * \brief Base class for a triangular part in a \b dense matrix + * + * This class is an abstract base class of class TriangularView, and objects of type TriangularViewImpl cannot be + * instantiated. It extends class TriangularView with additional methods which available for dense expressions only. + * + * \sa class TriangularView, MatrixBase::triangularView() + */ +template +class TriangularViewImpl : public TriangularBase> { + public: + typedef TriangularView TriangularViewType; + + typedef TriangularBase Base; + typedef typename internal::traits::Scalar Scalar; + + typedef MatrixType_ MatrixType; + typedef typename MatrixType::PlainObject DenseMatrixType; + typedef DenseMatrixType PlainObject; + + public: + using Base::derived; + using Base::evalToLazy; + + typedef typename internal::traits::StorageKind StorageKind; + + enum { Mode = Mode_, Flags = internal::traits::Flags }; + + /** \returns the outer-stride of the underlying dense matrix + * \sa DenseCoeffsBase::outerStride() */ + EIGEN_DEVICE_FUNC inline Index outerStride() const { return derived().nestedExpression().outerStride(); } + /** \returns the inner-stride of the underlying dense matrix + * \sa DenseCoeffsBase::innerStride() */ + EIGEN_DEVICE_FUNC inline Index innerStride() const { return derived().nestedExpression().innerStride(); } + + /** \sa MatrixBase::operator+=() */ + template + EIGEN_DEVICE_FUNC TriangularViewType& operator+=(const DenseBase& other) { + internal::call_assignment_no_alias(derived(), other.derived(), + internal::add_assign_op()); + return derived(); + } + /** \sa MatrixBase::operator-=() */ + template + EIGEN_DEVICE_FUNC TriangularViewType& operator-=(const DenseBase& other) { + internal::call_assignment_no_alias(derived(), other.derived(), + internal::sub_assign_op()); + return derived(); + } + + /** \sa MatrixBase::operator*=() */ + EIGEN_DEVICE_FUNC TriangularViewType& operator*=(const typename internal::traits::Scalar& other) { + return *this = derived().nestedExpression() * other; + } + /** \sa DenseBase::operator/=() */ + EIGEN_DEVICE_FUNC TriangularViewType& operator/=(const typename internal::traits::Scalar& other) { + return *this = derived().nestedExpression() / other; + } + + /** \sa MatrixBase::fill() */ + EIGEN_DEVICE_FUNC void fill(const Scalar& value) { setConstant(value); } + /** \sa MatrixBase::setConstant() */ + EIGEN_DEVICE_FUNC TriangularViewType& setConstant(const Scalar& value) { + return *this = MatrixType::Constant(derived().rows(), derived().cols(), value); + } + /** \sa MatrixBase::setZero() */ + EIGEN_DEVICE_FUNC TriangularViewType& setZero() { return setConstant(Scalar(0)); } + /** \sa MatrixBase::setOnes() */ + EIGEN_DEVICE_FUNC TriangularViewType& setOnes() { return setConstant(Scalar(1)); } + + /** \sa MatrixBase::coeff() + * \warning the coordinates must fit into the referenced triangular part + */ + EIGEN_DEVICE_FUNC inline Scalar coeff(Index row, Index col) const { + Base::check_coordinates_internal(row, col); + return derived().nestedExpression().coeff(row, col); + } + + /** \sa MatrixBase::coeffRef() + * \warning the coordinates must fit into the referenced triangular part + */ + EIGEN_DEVICE_FUNC inline Scalar& coeffRef(Index row, Index col) { + EIGEN_STATIC_ASSERT_LVALUE(TriangularViewType); + Base::check_coordinates_internal(row, col); + return derived().nestedExpression().coeffRef(row, col); + } + + /** Assigns a triangular matrix to a triangular part of a dense matrix */ + template + EIGEN_DEVICE_FUNC TriangularViewType& operator=(const TriangularBase& other); + + /** Shortcut for\code *this = other.other.triangularView<(*this)::Mode>() \endcode */ + template + EIGEN_DEVICE_FUNC TriangularViewType& operator=(const MatrixBase& other); + +#ifndef EIGEN_PARSED_BY_DOXYGEN + EIGEN_DEVICE_FUNC TriangularViewType& operator=(const TriangularViewImpl& other) { + return *this = other.derived().nestedExpression(); + } + + template + /** \deprecated */ + EIGEN_DEPRECATED EIGEN_DEVICE_FUNC void lazyAssign(const TriangularBase& other); + + template + /** \deprecated */ + EIGEN_DEPRECATED EIGEN_DEVICE_FUNC void lazyAssign(const MatrixBase& other); +#endif + + /** Efficient triangular matrix times vector/matrix product */ + template + EIGEN_DEVICE_FUNC const Product operator*( + const MatrixBase& rhs) const { + return Product(derived(), rhs.derived()); + } + + /** Efficient vector/matrix times triangular matrix product */ + template + friend EIGEN_DEVICE_FUNC const Product operator*( + const MatrixBase& lhs, const TriangularViewImpl& rhs) { + return Product(lhs.derived(), rhs.derived()); + } + + /** \returns the product of the inverse of \c *this with \a other, \a *this being triangular. + * + * This function computes the inverse-matrix matrix product inverse(\c *this) * \a other if + * \a Side==OnTheLeft (the default), or the right-inverse-multiply \a other * inverse(\c *this) if + * \a Side==OnTheRight. + * + * Note that the template parameter \c Side can be omitted, in which case \c Side==OnTheLeft + * + * The matrix \c *this must be triangular and invertible (i.e., all the coefficients of the + * diagonal must be non zero). It works as a forward (resp. backward) substitution if \c *this + * is an upper (resp. lower) triangular matrix. + * + * Example: \include Triangular_solve.cpp + * Output: \verbinclude Triangular_solve.out + * + * This function returns an expression of the inverse-multiply and can works in-place if it is assigned + * to the same matrix or vector \a other. + * + * For users coming from BLAS, this function (and more specifically solveInPlace()) offer + * all the operations supported by the \c *TRSV and \c *TRSM BLAS routines. + * + * \sa TriangularView::solveInPlace() + */ + template + inline const internal::triangular_solve_retval solve( + const MatrixBase& other) const; + + /** "in-place" version of TriangularView::solve() where the result is written in \a other + * + * \warning The parameter is only marked 'const' to make the C++ compiler accept a temporary expression here. + * This function will const_cast it, so constness isn't honored here. + * + * Note that the template parameter \c Side can be omitted, in which case \c Side==OnTheLeft + * + * See TriangularView:solve() for the details. + */ + template + EIGEN_DEVICE_FUNC void solveInPlace(const MatrixBase& other) const; + + template + EIGEN_DEVICE_FUNC void solveInPlace(const MatrixBase& other) const { + return solveInPlace(other); + } + + /** Swaps the coefficients of the common triangular parts of two matrices */ + template + EIGEN_DEVICE_FUNC +#ifdef EIGEN_PARSED_BY_DOXYGEN + void + swap(TriangularBase& other) +#else + void + swap(TriangularBase const& other) +#endif + { + EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); + } + + /** Shortcut for \code (*this).swap(other.triangularView<(*this)::Mode>()) \endcode */ + template + /** \deprecated */ + EIGEN_DEPRECATED EIGEN_DEVICE_FUNC void swap(MatrixBase const& other) { + EIGEN_STATIC_ASSERT_LVALUE(OtherDerived); + call_assignment(derived(), other.const_cast_derived(), internal::swap_assign_op()); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void _solve_impl(const RhsType& rhs, DstType& dst) const { + if (!internal::is_same_dense(dst, rhs)) dst = rhs; + this->solveInPlace(dst); + } + + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE TriangularViewType& _assignProduct(const ProductType& prod, const Scalar& alpha, + bool beta); + + protected: + EIGEN_DEFAULT_COPY_CONSTRUCTOR(TriangularViewImpl) + EIGEN_DEFAULT_EMPTY_CONSTRUCTOR_AND_DESTRUCTOR(TriangularViewImpl) +}; + +/*************************************************************************** + * Implementation of triangular evaluation/assignment + ***************************************************************************/ + +#ifndef EIGEN_PARSED_BY_DOXYGEN +// FIXME should we keep that possibility +template +template +EIGEN_DEVICE_FUNC inline TriangularView& TriangularViewImpl::operator=( + const MatrixBase& other) { + internal::call_assignment_no_alias(derived(), other.derived(), + internal::assign_op()); + return derived(); +} + +// FIXME should we keep that possibility +template +template +EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign(const MatrixBase& other) { + internal::call_assignment_no_alias(derived(), other.template triangularView()); +} + +template +template +EIGEN_DEVICE_FUNC inline TriangularView& TriangularViewImpl::operator=( + const TriangularBase& other) { + eigen_assert(Mode == int(OtherDerived::Mode)); + internal::call_assignment(derived(), other.derived()); + return derived(); +} + +template +template +EIGEN_DEVICE_FUNC void TriangularViewImpl::lazyAssign( + const TriangularBase& other) { + eigen_assert(Mode == int(OtherDerived::Mode)); + internal::call_assignment_no_alias(derived(), other.derived()); +} +#endif + +/*************************************************************************** + * Implementation of TriangularBase methods + ***************************************************************************/ + +/** Assigns a triangular or selfadjoint matrix to a dense matrix. + * If the matrix is triangular, the opposite part is set to zero. */ +template +template +EIGEN_DEVICE_FUNC void TriangularBase::evalTo(MatrixBase& other) const { + evalToLazy(other.derived()); +} + +/*************************************************************************** + * Implementation of TriangularView methods + ***************************************************************************/ + +/*************************************************************************** + * Implementation of MatrixBase methods + ***************************************************************************/ + +/** + * \returns an expression of a triangular view extracted from the current matrix + * + * The parameter \a Mode can have the following values: \c #Upper, \c #StrictlyUpper, \c #UnitUpper, + * \c #Lower, \c #StrictlyLower, \c #UnitLower. + * + * Example: \include MatrixBase_triangularView.cpp + * Output: \verbinclude MatrixBase_triangularView.out + * + * \sa class TriangularView + */ +template +template +EIGEN_DEVICE_FUNC typename MatrixBase::template TriangularViewReturnType::Type +MatrixBase::triangularView() { + return typename TriangularViewReturnType::Type(derived()); +} + +/** This is the const version of MatrixBase::triangularView() */ +template +template +EIGEN_DEVICE_FUNC typename MatrixBase::template ConstTriangularViewReturnType::Type +MatrixBase::triangularView() const { + return typename ConstTriangularViewReturnType::Type(derived()); +} + +/** \returns true if *this is approximately equal to an upper triangular matrix, + * within the precision given by \a prec. + * + * \sa isLowerTriangular() + */ +template +bool MatrixBase::isUpperTriangular(const RealScalar& prec) const { + RealScalar maxAbsOnUpperPart = static_cast(-1); + for (Index j = 0; j < cols(); ++j) { + Index maxi = numext::mini(j, rows() - 1); + for (Index i = 0; i <= maxi; ++i) { + RealScalar absValue = numext::abs(coeff(i, j)); + if (absValue > maxAbsOnUpperPart) maxAbsOnUpperPart = absValue; + } + } + RealScalar threshold = maxAbsOnUpperPart * prec; + for (Index j = 0; j < cols(); ++j) + for (Index i = j + 1; i < rows(); ++i) + if (numext::abs(coeff(i, j)) > threshold) return false; + return true; +} + +/** \returns true if *this is approximately equal to a lower triangular matrix, + * within the precision given by \a prec. + * + * \sa isUpperTriangular() + */ +template +bool MatrixBase::isLowerTriangular(const RealScalar& prec) const { + RealScalar maxAbsOnLowerPart = static_cast(-1); + for (Index j = 0; j < cols(); ++j) + for (Index i = j; i < rows(); ++i) { + RealScalar absValue = numext::abs(coeff(i, j)); + if (absValue > maxAbsOnLowerPart) maxAbsOnLowerPart = absValue; + } + RealScalar threshold = maxAbsOnLowerPart * prec; + for (Index j = 1; j < cols(); ++j) { + Index maxi = numext::mini(j, rows() - 1); + for (Index i = 0; i < maxi; ++i) + if (numext::abs(coeff(i, j)) > threshold) return false; + } + return true; +} + +/*************************************************************************** +**************************************************************************** +* Evaluators and Assignment of triangular expressions +*************************************************************************** +***************************************************************************/ + +namespace internal { + +// TODO currently a triangular expression has the form TriangularView<.,.> +// in the future triangular-ness should be defined by the expression traits +// such that Transpose > is valid. (currently TriangularBase::transpose() is overloaded to make +// it work) +template +struct evaluator_traits> { + typedef typename storage_kind_to_evaluator_kind::Kind Kind; + typedef typename glue_shapes::Shape, TriangularShape>::type Shape; +}; + +template +struct unary_evaluator, IndexBased> : evaluator> { + typedef TriangularView XprType; + typedef evaluator> Base; + EIGEN_DEVICE_FUNC unary_evaluator(const XprType& xpr) : Base(xpr.nestedExpression()) {} +}; + +// Additional assignment kinds: +struct Triangular2Triangular {}; +struct Triangular2Dense {}; +struct Dense2Triangular {}; + +template +struct triangular_assignment_loop; + +/** \internal Specialization of the dense assignment kernel for triangular matrices. + * The main difference is that the triangular, diagonal, and opposite parts are processed through three different + * functions. \tparam UpLo must be either Lower or Upper \tparam Mode must be either 0, UnitDiag, ZeroDiag, or + * SelfAdjoint + */ +template +class triangular_dense_assignment_kernel + : public generic_dense_assignment_kernel { + protected: + typedef generic_dense_assignment_kernel Base; + typedef typename Base::DstXprType DstXprType; + typedef typename Base::SrcXprType SrcXprType; + using Base::m_dst; + using Base::m_functor; + using Base::m_src; + + public: + typedef typename Base::DstEvaluatorType DstEvaluatorType; + typedef typename Base::SrcEvaluatorType SrcEvaluatorType; + typedef typename Base::Scalar Scalar; + typedef typename Base::AssignmentTraits AssignmentTraits; + + EIGEN_DEVICE_FUNC triangular_dense_assignment_kernel(DstEvaluatorType& dst, const SrcEvaluatorType& src, + const Functor& func, DstXprType& dstExpr) + : Base(dst, src, func, dstExpr) {} + +#ifdef EIGEN_INTERNAL_DEBUGGING + EIGEN_DEVICE_FUNC void assignCoeff(Index row, Index col) { + eigen_internal_assert(row != col); + Base::assignCoeff(row, col); + } +#else + using Base::assignCoeff; +#endif + + EIGEN_DEVICE_FUNC void assignDiagonalCoeff(Index id) { + if (Mode == UnitDiag && SetOpposite) + m_functor.assignCoeff(m_dst.coeffRef(id, id), Scalar(1)); + else if (Mode == ZeroDiag && SetOpposite) + m_functor.assignCoeff(m_dst.coeffRef(id, id), Scalar(0)); + else if (Mode == 0) + Base::assignCoeff(id, id); + } + + EIGEN_DEVICE_FUNC void assignOppositeCoeff(Index row, Index col) { + eigen_internal_assert(row != col); + if (SetOpposite) m_functor.assignCoeff(m_dst.coeffRef(row, col), Scalar(0)); + } +}; + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src, + const Functor& func) { + typedef evaluator DstEvaluatorType; + typedef evaluator SrcEvaluatorType; + + SrcEvaluatorType srcEvaluator(src); + + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + DstEvaluatorType dstEvaluator(dst); + + typedef triangular_dense_assignment_kernel + Kernel; + Kernel kernel(dstEvaluator, srcEvaluator, func, dst.const_cast_derived()); + + enum { + unroll = DstXprType::SizeAtCompileTime != Dynamic && SrcEvaluatorType::CoeffReadCost < HugeCost && + DstXprType::SizeAtCompileTime * + (int(DstEvaluatorType::CoeffReadCost) + int(SrcEvaluatorType::CoeffReadCost)) / 2 <= + EIGEN_UNROLLING_LIMIT + }; + + triangular_assignment_loop::run( + kernel); +} + +template +EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE void call_triangular_assignment_loop(DstXprType& dst, const SrcXprType& src) { + call_triangular_assignment_loop( + dst, src, internal::assign_op()); +} + +template <> +struct AssignmentKind { + typedef Triangular2Triangular Kind; +}; +template <> +struct AssignmentKind { + typedef Triangular2Dense Kind; +}; +template <> +struct AssignmentKind { + typedef Dense2Triangular Kind; +}; + +template +struct Assignment { + EIGEN_DEVICE_FUNC static void run(DstXprType& dst, const SrcXprType& src, const Functor& func) { + eigen_assert(int(DstXprType::Mode) == int(SrcXprType::Mode)); + + call_triangular_assignment_loop(dst, src, func); + } +}; + +template +struct Assignment { + EIGEN_DEVICE_FUNC static void run(DstXprType& dst, const SrcXprType& src, const Functor& func) { + call_triangular_assignment_loop(dst, src, func); + } +}; + +template +struct Assignment { + EIGEN_DEVICE_FUNC static void run(DstXprType& dst, const SrcXprType& src, const Functor& func) { + call_triangular_assignment_loop(dst, src, func); + } +}; + +template +struct triangular_assignment_loop { + // FIXME: this is not very clean, perhaps this information should be provided by the kernel? + typedef typename Kernel::DstEvaluatorType DstEvaluatorType; + typedef typename DstEvaluatorType::XprType DstXprType; + + enum { + col = (UnrollCount - 1) / DstXprType::RowsAtCompileTime, + row = (UnrollCount - 1) % DstXprType::RowsAtCompileTime + }; + + typedef typename Kernel::Scalar Scalar; + + EIGEN_DEVICE_FUNC static inline void run(Kernel& kernel) { + triangular_assignment_loop::run(kernel); + + if (row == col) + kernel.assignDiagonalCoeff(row); + else if (((Mode & Lower) && row > col) || ((Mode & Upper) && row < col)) + kernel.assignCoeff(row, col); + else if (SetOpposite) + kernel.assignOppositeCoeff(row, col); + } +}; + +// prevent buggy user code from causing an infinite recursion +template +struct triangular_assignment_loop { + EIGEN_DEVICE_FUNC static inline void run(Kernel&) {} +}; + +// TODO: experiment with a recursive assignment procedure splitting the current +// triangular part into one rectangular and two triangular parts. + +template +struct triangular_assignment_loop { + typedef typename Kernel::Scalar Scalar; + EIGEN_DEVICE_FUNC static inline void run(Kernel& kernel) { + for (Index j = 0; j < kernel.cols(); ++j) { + Index maxi = numext::mini(j, kernel.rows()); + Index i = 0; + if (((Mode & Lower) && SetOpposite) || (Mode & Upper)) { + for (; i < maxi; ++i) + if (Mode & Upper) + kernel.assignCoeff(i, j); + else + kernel.assignOppositeCoeff(i, j); + } else + i = maxi; + + if (i < kernel.rows()) // then i==j + kernel.assignDiagonalCoeff(i++); + + if (((Mode & Upper) && SetOpposite) || (Mode & Lower)) { + for (; i < kernel.rows(); ++i) + if (Mode & Lower) + kernel.assignCoeff(i, j); + else + kernel.assignOppositeCoeff(i, j); + } + } + } +}; + +} // end namespace internal + +/** Assigns a triangular or selfadjoint matrix to a dense matrix. + * If the matrix is triangular, the opposite part is set to zero. */ +template +template +EIGEN_DEVICE_FUNC void TriangularBase::evalToLazy(MatrixBase& other) const { + other.derived().resize(this->rows(), this->cols()); + internal::call_triangular_assignment_loop( + other.derived(), derived().nestedExpression()); +} + +namespace internal { + +// Triangular = Product +template +struct Assignment, + internal::assign_op::Scalar>, Dense2Triangular> { + typedef Product SrcXprType; + static void run(DstXprType& dst, const SrcXprType& src, + const internal::assign_op&) { + Index dstRows = src.rows(); + Index dstCols = src.cols(); + if ((dst.rows() != dstRows) || (dst.cols() != dstCols)) dst.resize(dstRows, dstCols); + + dst._assignProduct(src, Scalar(1), false); + } +}; + +// Triangular += Product +template +struct Assignment, + internal::add_assign_op::Scalar>, + Dense2Triangular> { + typedef Product SrcXprType; + static void run(DstXprType& dst, const SrcXprType& src, + const internal::add_assign_op&) { + dst._assignProduct(src, Scalar(1), true); + } +}; + +// Triangular -= Product +template +struct Assignment, + internal::sub_assign_op::Scalar>, + Dense2Triangular> { + typedef Product SrcXprType; + static void run(DstXprType& dst, const SrcXprType& src, + const internal::sub_assign_op&) { + dst._assignProduct(src, Scalar(-1), true); + } +}; + +} // end namespace internal + +} // end namespace Eigen + +#endif // EIGEN_TRIANGULARMATRIX_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/VectorBlock.h b/o-voxel/third_party/eigen/Eigen/src/Core/VectorBlock.h new file mode 100644 index 0000000000000000000000000000000000000000..a3021ff512b10ce328920d8935056b9c7ac5004f --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/VectorBlock.h @@ -0,0 +1,83 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2010 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_VECTORBLOCK_H +#define EIGEN_VECTORBLOCK_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +namespace internal { +template +struct traits > + : public traits::Flags & RowMajorBit ? 1 : Size, + traits::Flags & RowMajorBit ? Size : 1> > {}; +} // namespace internal + +/** \class VectorBlock + * \ingroup Core_Module + * + * \brief Expression of a fixed-size or dynamic-size sub-vector + * + * \tparam VectorType the type of the object in which we are taking a sub-vector + * \tparam Size size of the sub-vector we are taking at compile time (optional) + * + * This class represents an expression of either a fixed-size or dynamic-size sub-vector. + * It is the return type of DenseBase::segment(Index,Index) and DenseBase::segment(Index) and + * most of the time this is the only way it is used. + * + * However, if you want to directly manipulate sub-vector expressions, + * for instance if you want to write a function returning such an expression, you + * will need to use this class. + * + * Here is an example illustrating the dynamic case: + * \include class_VectorBlock.cpp + * Output: \verbinclude class_VectorBlock.out + * + * \note Even though this expression has dynamic size, in the case where \a VectorType + * has fixed size, this expression inherits a fixed maximal size which means that evaluating + * it does not cause a dynamic memory allocation. + * + * Here is an example illustrating the fixed-size case: + * \include class_FixedVectorBlock.cpp + * Output: \verbinclude class_FixedVectorBlock.out + * + * \sa class Block, DenseBase::segment(Index,Index,Index,Index), DenseBase::segment(Index,Index) + */ +template +class VectorBlock : public Block::Flags & RowMajorBit ? 1 : Size, + internal::traits::Flags & RowMajorBit ? Size : 1> { + typedef Block::Flags & RowMajorBit ? 1 : Size, + internal::traits::Flags & RowMajorBit ? Size : 1> + Base; + enum { IsColVector = !(internal::traits::Flags & RowMajorBit) }; + + public: + EIGEN_DENSE_PUBLIC_INTERFACE(VectorBlock) + EIGEN_STATIC_ASSERT_VECTOR_ONLY(VectorBlock) + EIGEN_INHERIT_ASSIGNMENT_OPERATORS(VectorBlock) + + /** Dynamic-size constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE VectorBlock(VectorType& vector, Index start, Index size) + : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start, IsColVector ? size : 1, IsColVector ? 1 : size) { + } + + /** Fixed-size constructor + */ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE VectorBlock(VectorType& vector, Index start) + : Base(vector, IsColVector ? start : 0, IsColVector ? 0 : start) {} +}; + +} // end namespace Eigen + +#endif // EIGEN_VECTORBLOCK_H diff --git a/o-voxel/third_party/eigen/Eigen/src/Core/VectorwiseOp.h b/o-voxel/third_party/eigen/Eigen/src/Core/VectorwiseOp.h new file mode 100644 index 0000000000000000000000000000000000000000..252820b134943a6b0fae5738166cabf55e672dcc --- /dev/null +++ b/o-voxel/third_party/eigen/Eigen/src/Core/VectorwiseOp.h @@ -0,0 +1,733 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2008-2019 Gael Guennebaud +// Copyright (C) 2006-2008 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_PARTIAL_REDUX_H +#define EIGEN_PARTIAL_REDUX_H + +// IWYU pragma: private +#include "./InternalHeaderCheck.h" + +namespace Eigen { + +/** \class PartialReduxExpr + * \ingroup Core_Module + * + * \brief Generic expression of a partially reduxed matrix + * + * \tparam MatrixType the type of the matrix we are applying the redux operation + * \tparam MemberOp type of the member functor + * \tparam Direction indicates the direction of the redux (#Vertical or #Horizontal) + * + * This class represents an expression of a partial redux operator of a matrix. + * It is the return type of some VectorwiseOp functions, + * and most of the time this is the only way it is used. + * + * \sa class VectorwiseOp + */ + +template +class PartialReduxExpr; + +namespace internal { + +template +struct traits > : traits { + typedef typename MemberOp::result_type Scalar; + typedef typename traits::StorageKind StorageKind; + typedef typename traits::XprKind XprKind; + typedef typename MatrixType::Scalar InputScalar; + enum { + RowsAtCompileTime = Direction == Vertical ? 1 : MatrixType::RowsAtCompileTime, + ColsAtCompileTime = Direction == Horizontal ? 1 : MatrixType::ColsAtCompileTime, + MaxRowsAtCompileTime = Direction == Vertical ? 1 : MatrixType::MaxRowsAtCompileTime, + MaxColsAtCompileTime = Direction == Horizontal ? 1 : MatrixType::MaxColsAtCompileTime, + Flags = RowsAtCompileTime == 1 ? RowMajorBit : 0, + TraversalSize = Direction == Vertical ? MatrixType::RowsAtCompileTime : MatrixType::ColsAtCompileTime + }; +}; +} // namespace internal + +template +class PartialReduxExpr : public internal::dense_xpr_base >::type, + internal::no_assignment_operator { + public: + typedef typename internal::dense_xpr_base::type Base; + EIGEN_DENSE_PUBLIC_INTERFACE(PartialReduxExpr) + + EIGEN_DEVICE_FUNC explicit PartialReduxExpr(const MatrixType& mat, const MemberOp& func = MemberOp()) + : m_matrix(mat), m_functor(func) {} + + EIGEN_DEVICE_FUNC constexpr Index rows() const noexcept { return (Direction == Vertical ? 1 : m_matrix.rows()); } + EIGEN_DEVICE_FUNC constexpr Index cols() const noexcept { return (Direction == Horizontal ? 1 : m_matrix.cols()); } + + EIGEN_DEVICE_FUNC typename MatrixType::Nested nestedExpression() const { return m_matrix; } + + EIGEN_DEVICE_FUNC const MemberOp& functor() const { return m_functor; } + + protected: + typename MatrixType::Nested m_matrix; + const MemberOp m_functor; +}; + +template +struct partial_redux_dummy_func; + +#define EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER, COST, VECTORIZABLE, BINARYOP) \ + template \ + struct member_##MEMBER { \ + typedef ResultType result_type; \ + typedef BINARYOP BinaryOp; \ + template \ + struct Cost { \ + enum { value = COST }; \ + }; \ + enum { Vectorizable = VECTORIZABLE }; \ + template \ + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE ResultType operator()(const XprType& mat) const { \ + return mat.MEMBER(); \ + } \ + BinaryOp binaryFunc() const { return BinaryOp(); } \ + } + +#define EIGEN_MEMBER_FUNCTOR(MEMBER, COST) EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(MEMBER, COST, 0, partial_redux_dummy_func) + +namespace internal { + +EIGEN_MEMBER_FUNCTOR(norm, (Size + 5) * NumTraits::MulCost + (Size - 1) * NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(stableNorm, (Size + 5) * NumTraits::MulCost + (Size - 1) * NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(blueNorm, (Size + 5) * NumTraits::MulCost + (Size - 1) * NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(hypotNorm, (Size - 1) * functor_traits >::Cost); +EIGEN_MEMBER_FUNCTOR(all, (Size - 1) * NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(any, (Size - 1) * NumTraits::AddCost); +EIGEN_MEMBER_FUNCTOR(count, (Size - 1) * NumTraits::AddCost); + +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(sum, (Size - 1) * NumTraits::AddCost, 1, internal::scalar_sum_op); +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(minCoeff, (Size - 1) * NumTraits::AddCost, 1, internal::scalar_min_op); +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(maxCoeff, (Size - 1) * NumTraits::AddCost, 1, internal::scalar_max_op); +EIGEN_MAKE_PARTIAL_REDUX_FUNCTOR(prod, (Size - 1) * NumTraits::MulCost, 1, internal::scalar_product_op); + +template +struct member_lpnorm { + typedef ResultType result_type; + enum { Vectorizable = 0 }; + template + struct Cost { + enum { value = (Size + 5) * NumTraits::MulCost + (Size - 1) * NumTraits::AddCost }; + }; + EIGEN_DEVICE_FUNC member_lpnorm() {} + template + EIGEN_DEVICE_FUNC inline ResultType operator()(const XprType& mat) const { + return mat.template lpNorm

(); + } +}; + +template +struct member_redux { + typedef BinaryOpT BinaryOp; + typedef typename result_of::type result_type; + + enum { Vectorizable = functor_traits::PacketAccess }; + template + struct Cost { + enum { value = (Size - 1) * functor_traits::Cost }; + }; + EIGEN_DEVICE_FUNC explicit member_redux(const BinaryOp func) : m_functor(func) {} + template + EIGEN_DEVICE_FUNC inline result_type operator()(const DenseBase& mat) const { + return mat.redux(m_functor); + } + const BinaryOp& binaryFunc() const { return m_functor; } + const BinaryOp m_functor; +}; + +template +struct scalar_replace_zero_with_one_op { + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Scalar operator()(const Scalar& x) const { + return numext::is_exactly_zero(x) ? Scalar(1) : x; + } + template + EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Packet packetOp(const Packet& x) const { + return pselect(pcmp_eq(x, pzero(x)), pset1(Scalar(1)), x); + } +}; +template +struct functor_traits> { + enum { Cost = 1, PacketAccess = packet_traits::HasCmp }; +}; + +} // namespace internal + +/** \class VectorwiseOp + * \ingroup Core_Module + * + * \brief Pseudo expression providing broadcasting and partial reduction operations + * + * \tparam ExpressionType the type of the object on which to do partial reductions + * \tparam Direction indicates whether to operate on columns (#Vertical) or rows (#Horizontal) + * + * This class represents a pseudo expression with broadcasting and partial reduction features. + * It is the return type of DenseBase::colwise() and DenseBase::rowwise() + * and most of the time this is the only way it is explicitly used. + * + * To understand the logic of rowwise/colwise expression, let's consider a generic case `A.colwise().foo()` + * where `foo` is any method of `VectorwiseOp`. This expression is equivalent to applying `foo()` to each + * column of `A` and then re-assemble the outputs in a matrix expression: + * \code [A.col(0).foo(), A.col(1).foo(), ..., A.col(A.cols()-1).foo()] \endcode + * + * Example: \include MatrixBase_colwise.cpp + * Output: \verbinclude MatrixBase_colwise.out + * + * The begin() and end() methods are obviously exceptions to the previous rule as they + * return STL-compatible begin/end iterators to the rows or columns of the nested expression. + * Typical use cases include for-range-loop and calls to STL algorithms: + * + * Example: \include MatrixBase_colwise_iterator_cxx11.cpp + * Output: \verbinclude MatrixBase_colwise_iterator_cxx11.out + * + * For a partial reduction on an empty input, some rules apply. + * For the sake of clarity, let's consider a vertical reduction: + * - If the number of columns is zero, then a 1x0 row-major vector expression is returned. + * - Otherwise, if the number of rows is zero, then + * - a row vector of zeros is returned for sum-like reductions (sum, squaredNorm, norm, etc.) + * - a row vector of ones is returned for a product reduction (e.g., MatrixXd(n,0).colwise().prod()) + * - an assert is triggered for all other reductions (minCoeff,maxCoeff,redux(bin_op)) + * + * \sa DenseBase::colwise(), DenseBase::rowwise(), class PartialReduxExpr + */ +template +class VectorwiseOp { + public: + typedef typename ExpressionType::Scalar Scalar; + typedef typename ExpressionType::RealScalar RealScalar; + typedef internal::remove_all_t ExpressionTypeCleaned; + + template