Upload 9 files
Browse files- chg_package/LICENSE +21 -0
- chg_package/README.md +150 -0
- chg_package/chg_algorithm/__init__.py +18 -0
- chg_package/chg_algorithm/core.py +350 -0
- chg_package/examples/basic_example.py +153 -0
- chg_package/pyproject.toml +69 -0
- chg_package/requirements.txt +2 -0
- chg_package/setup.py +59 -0
- chg_package/tests/test_chg.py +179 -0
chg_package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2024 CHG Algorithm Team
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
chg_package/README.md
ADDED
|
@@ -0,0 +1,150 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CHG Algorithm Package
|
| 2 |
+
|
| 3 |
+
[](https://python.org)
|
| 4 |
+
[](LICENSE)
|
| 5 |
+
|
| 6 |
+
CHG (Covariance-based Hilbert Geometry) Algorithm is a sophisticated implementation of Gaussian Process regression with enhanced multi-head attention mechanisms for improved covariance computation and uncertainty quantification.
|
| 7 |
+
|
| 8 |
+
## Features
|
| 9 |
+
|
| 10 |
+
- **Multi-head Attention**: Advanced attention mechanism for better feature representation
|
| 11 |
+
- **Enhanced Covariance**: Sophisticated covariance computation using neural network components
|
| 12 |
+
- **Uncertainty Quantification**: Built-in uncertainty estimation for predictions
|
| 13 |
+
- **Optimization Support**: Gradient-based parameter optimization
|
| 14 |
+
- **Robust Implementation**: Handles numerical instabilities gracefully
|
| 15 |
+
|
| 16 |
+
## Installation
|
| 17 |
+
|
| 18 |
+
### From Source
|
| 19 |
+
|
| 20 |
+
```bash
|
| 21 |
+
git clone <repository-url>
|
| 22 |
+
cd chg_package
|
| 23 |
+
pip install -e .
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
### Development Installation
|
| 27 |
+
|
| 28 |
+
```bash
|
| 29 |
+
pip install -e ".[dev]"
|
| 30 |
+
```
|
| 31 |
+
|
| 32 |
+
## Quick Start
|
| 33 |
+
|
| 34 |
+
### Basic Usage
|
| 35 |
+
|
| 36 |
+
```python
|
| 37 |
+
from chg_algorithm import CHG
|
| 38 |
+
import numpy as np
|
| 39 |
+
|
| 40 |
+
# Initialize model
|
| 41 |
+
model = CHG(input_dim=3, hidden_dim=24, num_heads=4)
|
| 42 |
+
|
| 43 |
+
# Generate sample data
|
| 44 |
+
X_train = np.random.randn(100, 3)
|
| 45 |
+
y_train = np.sum(X_train**2, axis=1) + 0.1 * np.random.randn(100)
|
| 46 |
+
X_test = np.random.randn(20, 3)
|
| 47 |
+
|
| 48 |
+
# Make predictions
|
| 49 |
+
pred_mean, pred_var = model.fit_predict(X_train, y_train, X_test)
|
| 50 |
+
|
| 51 |
+
print(f"Predictions: {pred_mean}")
|
| 52 |
+
print(f"Uncertainties: {np.sqrt(pred_var)}")
|
| 53 |
+
```
|
| 54 |
+
|
| 55 |
+
### Running Demo
|
| 56 |
+
|
| 57 |
+
```python
|
| 58 |
+
from chg_algorithm import run_chg_experiment
|
| 59 |
+
|
| 60 |
+
# Run complete demonstration
|
| 61 |
+
model, predictions, variances = run_chg_experiment()
|
| 62 |
+
```
|
| 63 |
+
|
| 64 |
+
### Using Optimizer
|
| 65 |
+
|
| 66 |
+
```python
|
| 67 |
+
from chg_algorithm import CHG, CHGOptimizer
|
| 68 |
+
|
| 69 |
+
# Initialize model and optimizer
|
| 70 |
+
model = CHG(input_dim=3, hidden_dim=24, num_heads=4)
|
| 71 |
+
optimizer = CHGOptimizer(model, learning_rate=0.001)
|
| 72 |
+
|
| 73 |
+
# Optimize model parameters
|
| 74 |
+
for epoch in range(10):
|
| 75 |
+
optimizer.step(X_train, y_train)
|
| 76 |
+
lml = model.log_marginal_likelihood(X_train, y_train)
|
| 77 |
+
print(f"Epoch {epoch}: Log Marginal Likelihood = {lml:.4f}")
|
| 78 |
+
```
|
| 79 |
+
|
| 80 |
+
## API Reference
|
| 81 |
+
|
| 82 |
+
### CHG Class
|
| 83 |
+
|
| 84 |
+
Main model class implementing the CHG algorithm.
|
| 85 |
+
|
| 86 |
+
**Parameters:**
|
| 87 |
+
- `input_dim` (int): Dimensionality of input features
|
| 88 |
+
- `hidden_dim` (int): Hidden dimension for feature transformation
|
| 89 |
+
- `num_heads` (int): Number of attention heads
|
| 90 |
+
|
| 91 |
+
**Key Methods:**
|
| 92 |
+
- `fit_predict(X_train, y_train, X_test, noise_var=1e-6)`: Fit model and predict
|
| 93 |
+
- `log_marginal_likelihood(X, y, noise_var=1e-6)`: Compute log marginal likelihood
|
| 94 |
+
- `get_covariance_matrix(X)`: Get covariance matrix for inputs
|
| 95 |
+
|
| 96 |
+
### CHGOptimizer Class
|
| 97 |
+
|
| 98 |
+
Optimizer for CHG model parameters.
|
| 99 |
+
|
| 100 |
+
**Parameters:**
|
| 101 |
+
- `model` (CHG): CHG model instance to optimize
|
| 102 |
+
- `learning_rate` (float): Learning rate for parameter updates
|
| 103 |
+
|
| 104 |
+
**Key Methods:**
|
| 105 |
+
- `step(X, y, noise_var=1e-6)`: Perform one optimization step
|
| 106 |
+
- `compute_gradients(X, y, noise_var=1e-6)`: Compute parameter gradients
|
| 107 |
+
|
| 108 |
+
## Algorithm Details
|
| 109 |
+
|
| 110 |
+
The CHG algorithm combines several advanced techniques:
|
| 111 |
+
|
| 112 |
+
1. **Multi-head Attention**: Uses multiple attention heads to capture different aspects of feature relationships
|
| 113 |
+
2. **Enhanced Covariance**: Computes covariance using feedforward networks and layer normalization
|
| 114 |
+
3. **Gaussian Process Framework**: Built on solid GP foundations for uncertainty quantification
|
| 115 |
+
4. **Numerical Stability**: Includes fallback methods for numerical edge cases
|
| 116 |
+
|
| 117 |
+
## Requirements
|
| 118 |
+
|
| 119 |
+
- Python >= 3.7
|
| 120 |
+
- NumPy >= 1.19.0
|
| 121 |
+
- typing-extensions >= 3.7.4
|
| 122 |
+
|
| 123 |
+
## License
|
| 124 |
+
|
| 125 |
+
This project is licensed under the MIT License - see the LICENSE file for details.
|
| 126 |
+
|
| 127 |
+
## Contributing
|
| 128 |
+
|
| 129 |
+
1. Fork the repository
|
| 130 |
+
2. Create your feature branch (`git checkout -b feature/amazing-feature`)
|
| 131 |
+
3. Commit your changes (`git commit -m 'Add some amazing feature'`)
|
| 132 |
+
4. Push to the branch (`git push origin feature/amazing-feature`)
|
| 133 |
+
5. Open a Pull Request
|
| 134 |
+
|
| 135 |
+
## Citation
|
| 136 |
+
|
| 137 |
+
If you use this software in your research, please cite:
|
| 138 |
+
|
| 139 |
+
```bibtex
|
| 140 |
+
@software{chg_algorithm,
|
| 141 |
+
title={CHG Algorithm: Covariance-based Hilbert Geometry for Gaussian Processes},
|
| 142 |
+
author={CHG Algorithm Team},
|
| 143 |
+
year={2024},
|
| 144 |
+
url={https://github.com/your-username/chg-algorithm}
|
| 145 |
+
}
|
| 146 |
+
```
|
| 147 |
+
|
| 148 |
+
## Contact
|
| 149 |
+
|
| 150 |
+
For questions and support, please contact: chg@example.com
|
chg_package/chg_algorithm/__init__.py
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CHG Algorithm Package
|
| 3 |
+
|
| 4 |
+
A Python package implementing the CHG (Covariance-based Hilbert Geometry) algorithm
|
| 5 |
+
for Gaussian Process regression with enhanced multi-head attention mechanisms.
|
| 6 |
+
"""
|
| 7 |
+
|
| 8 |
+
from .core import CHG, CHGOptimizer, run_chg_experiment
|
| 9 |
+
|
| 10 |
+
__version__ = "1.0.0"
|
| 11 |
+
__author__ = "CHG Algorithm Team"
|
| 12 |
+
__email__ = "chg@example.com"
|
| 13 |
+
|
| 14 |
+
__all__ = [
|
| 15 |
+
'CHG',
|
| 16 |
+
'CHGOptimizer',
|
| 17 |
+
'run_chg_experiment'
|
| 18 |
+
]
|
chg_package/chg_algorithm/core.py
ADDED
|
@@ -0,0 +1,350 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
CHG (Covariance-based Hilbert Geometry) Algorithm Implementation
|
| 3 |
+
|
| 4 |
+
This module contains the core CHG algorithm implementation with multi-head attention
|
| 5 |
+
mechanism for Gaussian Process regression with enhanced covariance computation.
|
| 6 |
+
|
| 7 |
+
Author: CHG Algorithm Team
|
| 8 |
+
Version: 1.0.0
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import numpy as np
|
| 12 |
+
from typing import Tuple, Optional
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
class CHG:
|
| 16 |
+
"""
|
| 17 |
+
CHG (Covariance-based Hilbert Geometry) Model
|
| 18 |
+
|
| 19 |
+
A Gaussian Process model with multi-head attention mechanism for enhanced
|
| 20 |
+
covariance computation, supporting uncertainty quantification and optimization.
|
| 21 |
+
|
| 22 |
+
Parameters:
|
| 23 |
+
-----------
|
| 24 |
+
input_dim : int
|
| 25 |
+
Dimensionality of input features
|
| 26 |
+
hidden_dim : int
|
| 27 |
+
Hidden dimension for feature transformation
|
| 28 |
+
num_heads : int
|
| 29 |
+
Number of attention heads
|
| 30 |
+
"""
|
| 31 |
+
|
| 32 |
+
def __init__(self, input_dim: int, hidden_dim: int, num_heads: int):
|
| 33 |
+
self.input_dim = input_dim
|
| 34 |
+
self.hidden_dim = hidden_dim
|
| 35 |
+
self.num_heads = num_heads
|
| 36 |
+
self.head_dim = hidden_dim // num_heads
|
| 37 |
+
|
| 38 |
+
self._init_parameters()
|
| 39 |
+
|
| 40 |
+
def _init_parameters(self):
|
| 41 |
+
"""Initialize model parameters with proper scaling"""
|
| 42 |
+
# QKV projection matrices
|
| 43 |
+
self.W_q = np.random.normal(0, 0.02, (self.input_dim, self.hidden_dim))
|
| 44 |
+
self.W_k = np.random.normal(0, 0.02, (self.input_dim, self.hidden_dim))
|
| 45 |
+
self.W_v = np.random.normal(0, 0.02, (self.input_dim, self.hidden_dim))
|
| 46 |
+
|
| 47 |
+
# Covariance feedforward network
|
| 48 |
+
self.W_ff1 = np.random.normal(0, 0.02, (self.hidden_dim, 2 * self.hidden_dim))
|
| 49 |
+
self.b_ff1 = np.zeros((2 * self.hidden_dim,))
|
| 50 |
+
self.W_ff2 = np.random.normal(0, 0.02, (2 * self.hidden_dim, 1))
|
| 51 |
+
self.b_ff2 = np.zeros((1,))
|
| 52 |
+
|
| 53 |
+
# Layer normalization
|
| 54 |
+
self.gamma = np.ones((self.hidden_dim,))
|
| 55 |
+
self.beta = np.zeros((self.hidden_dim,))
|
| 56 |
+
|
| 57 |
+
# Multi-head fusion
|
| 58 |
+
self.W_heads = np.random.normal(0, 0.02, (self.num_heads, 1))
|
| 59 |
+
self.scale = np.random.normal(1.0, 0.1, (1,))
|
| 60 |
+
|
| 61 |
+
def _layer_norm(self, x: np.ndarray, gamma: np.ndarray, beta: np.ndarray,
|
| 62 |
+
eps: float = 1e-6) -> np.ndarray:
|
| 63 |
+
"""Apply layer normalization"""
|
| 64 |
+
mean = np.mean(x, axis=-1, keepdims=True)
|
| 65 |
+
var = np.var(x, axis=-1, keepdims=True)
|
| 66 |
+
return gamma * (x - mean) / np.sqrt(var + eps) + beta
|
| 67 |
+
|
| 68 |
+
def _gelu(self, x: np.ndarray) -> np.ndarray:
|
| 69 |
+
"""GELU activation function"""
|
| 70 |
+
return 0.5 * x * (1 + np.tanh(np.sqrt(2/np.pi) * (x + 0.044715 * x**3)))
|
| 71 |
+
|
| 72 |
+
def _compute_covariance(self, X1: np.ndarray, X2: np.ndarray) -> np.ndarray:
|
| 73 |
+
"""
|
| 74 |
+
Compute enhanced covariance matrix using multi-head attention mechanism
|
| 75 |
+
|
| 76 |
+
Parameters:
|
| 77 |
+
-----------
|
| 78 |
+
X1 : np.ndarray
|
| 79 |
+
First set of input points
|
| 80 |
+
X2 : np.ndarray
|
| 81 |
+
Second set of input points
|
| 82 |
+
|
| 83 |
+
Returns:
|
| 84 |
+
--------
|
| 85 |
+
np.ndarray
|
| 86 |
+
Covariance matrix between X1 and X2
|
| 87 |
+
"""
|
| 88 |
+
n1, n2 = X1.shape[0], X2.shape[0]
|
| 89 |
+
|
| 90 |
+
# Project to QKV spaces
|
| 91 |
+
Q1 = X1 @ self.W_q
|
| 92 |
+
K2 = X2 @ self.W_k
|
| 93 |
+
V2 = X2 @ self.W_v
|
| 94 |
+
|
| 95 |
+
# Reshape for multi-head attention
|
| 96 |
+
Q1_h = Q1.reshape(n1, self.num_heads, self.head_dim)
|
| 97 |
+
K2_h = K2.reshape(n2, self.num_heads, self.head_dim)
|
| 98 |
+
V2_h = V2.reshape(n2, self.num_heads, self.head_dim)
|
| 99 |
+
|
| 100 |
+
head_outputs = []
|
| 101 |
+
|
| 102 |
+
for h in range(self.num_heads):
|
| 103 |
+
Q_h = Q1_h[:, h, :]
|
| 104 |
+
K_h = K2_h[:, h, :]
|
| 105 |
+
V_h = V2_h[:, h, :]
|
| 106 |
+
|
| 107 |
+
# Attention scores as base similarity
|
| 108 |
+
attention_scores = Q_h @ K_h.T / np.sqrt(self.head_dim)
|
| 109 |
+
|
| 110 |
+
# Enhanced covariance computation
|
| 111 |
+
enhanced_cov = np.zeros((n1, n2))
|
| 112 |
+
|
| 113 |
+
for i in range(n1):
|
| 114 |
+
for j in range(n2):
|
| 115 |
+
base_sim = attention_scores[i, j]
|
| 116 |
+
|
| 117 |
+
# Feature interaction
|
| 118 |
+
feature_int = Q_h[i] * K_h[j]
|
| 119 |
+
|
| 120 |
+
# Layer normalization
|
| 121 |
+
norm_features = self._layer_norm(
|
| 122 |
+
feature_int.reshape(1, -1),
|
| 123 |
+
self.gamma[:self.head_dim],
|
| 124 |
+
self.beta[:self.head_dim]
|
| 125 |
+
).flatten()
|
| 126 |
+
|
| 127 |
+
# Feedforward processing
|
| 128 |
+
ff_hidden = norm_features @ self.W_ff1[:self.head_dim, :self.head_dim] + self.b_ff1[:self.head_dim]
|
| 129 |
+
ff_hidden = self._gelu(ff_hidden)
|
| 130 |
+
ff_out = ff_hidden @ self.W_ff2[:self.head_dim, :] + self.b_ff2
|
| 131 |
+
|
| 132 |
+
# Residual connection
|
| 133 |
+
enhanced_cov[i, j] = base_sim + ff_out[0]
|
| 134 |
+
|
| 135 |
+
head_outputs.append(enhanced_cov)
|
| 136 |
+
|
| 137 |
+
# Fuse multi-head outputs
|
| 138 |
+
final_cov = np.zeros((n1, n2))
|
| 139 |
+
for h, head_out in enumerate(head_outputs):
|
| 140 |
+
final_cov += self.W_heads[h, 0] * head_out
|
| 141 |
+
|
| 142 |
+
final_cov = self.scale[0] * final_cov
|
| 143 |
+
|
| 144 |
+
# Ensure positive definiteness for diagonal case
|
| 145 |
+
if n1 == n2 and np.allclose(X1, X2):
|
| 146 |
+
final_cov = 0.5 * (final_cov + final_cov.T)
|
| 147 |
+
final_cov += 1e-6 * np.eye(n1)
|
| 148 |
+
|
| 149 |
+
return final_cov
|
| 150 |
+
|
| 151 |
+
def fit_predict(self, X_train: np.ndarray, y_train: np.ndarray,
|
| 152 |
+
X_test: np.ndarray, noise_var: float = 1e-6) -> Tuple[np.ndarray, np.ndarray]:
|
| 153 |
+
"""
|
| 154 |
+
Fit the model and make predictions
|
| 155 |
+
|
| 156 |
+
Parameters:
|
| 157 |
+
-----------
|
| 158 |
+
X_train : np.ndarray
|
| 159 |
+
Training input data
|
| 160 |
+
y_train : np.ndarray
|
| 161 |
+
Training target values
|
| 162 |
+
X_test : np.ndarray
|
| 163 |
+
Test input data
|
| 164 |
+
noise_var : float
|
| 165 |
+
Observation noise variance
|
| 166 |
+
|
| 167 |
+
Returns:
|
| 168 |
+
--------
|
| 169 |
+
Tuple[np.ndarray, np.ndarray]
|
| 170 |
+
Predictive mean and variance
|
| 171 |
+
"""
|
| 172 |
+
# Compute covariance matrices
|
| 173 |
+
K_train = self._compute_covariance(X_train, X_train)
|
| 174 |
+
K_test_train = self._compute_covariance(X_test, X_train)
|
| 175 |
+
K_test = self._compute_covariance(X_test, X_test)
|
| 176 |
+
|
| 177 |
+
# GP inference
|
| 178 |
+
K_noisy = K_train + noise_var * np.eye(len(X_train))
|
| 179 |
+
|
| 180 |
+
try:
|
| 181 |
+
L = np.linalg.cholesky(K_noisy)
|
| 182 |
+
alpha = np.linalg.solve(L, y_train)
|
| 183 |
+
alpha = np.linalg.solve(L.T, alpha)
|
| 184 |
+
|
| 185 |
+
# Predictive mean
|
| 186 |
+
mean_pred = K_test_train @ alpha
|
| 187 |
+
|
| 188 |
+
# Predictive variance
|
| 189 |
+
v = np.linalg.solve(L, K_test_train.T)
|
| 190 |
+
var_pred = np.diag(K_test) - np.sum(v**2, axis=0)
|
| 191 |
+
|
| 192 |
+
except np.linalg.LinAlgError:
|
| 193 |
+
K_inv = np.linalg.pinv(K_noisy)
|
| 194 |
+
mean_pred = K_test_train @ K_inv @ y_train
|
| 195 |
+
var_pred = np.diag(K_test - K_test_train @ K_inv @ K_test_train.T)
|
| 196 |
+
|
| 197 |
+
var_pred = np.maximum(var_pred, 1e-8)
|
| 198 |
+
return mean_pred, var_pred
|
| 199 |
+
|
| 200 |
+
def log_marginal_likelihood(self, X: np.ndarray, y: np.ndarray,
|
| 201 |
+
noise_var: float = 1e-6) -> float:
|
| 202 |
+
"""
|
| 203 |
+
Compute log marginal likelihood for model selection
|
| 204 |
+
|
| 205 |
+
Parameters:
|
| 206 |
+
-----------
|
| 207 |
+
X : np.ndarray
|
| 208 |
+
Input data
|
| 209 |
+
y : np.ndarray
|
| 210 |
+
Target values
|
| 211 |
+
noise_var : float
|
| 212 |
+
Observation noise variance
|
| 213 |
+
|
| 214 |
+
Returns:
|
| 215 |
+
--------
|
| 216 |
+
float
|
| 217 |
+
Log marginal likelihood
|
| 218 |
+
"""
|
| 219 |
+
K = self._compute_covariance(X, X)
|
| 220 |
+
K_noisy = K + noise_var * np.eye(len(X))
|
| 221 |
+
|
| 222 |
+
try:
|
| 223 |
+
L = np.linalg.cholesky(K_noisy)
|
| 224 |
+
alpha = np.linalg.solve(L, y)
|
| 225 |
+
|
| 226 |
+
data_fit = -0.5 * y.T @ alpha
|
| 227 |
+
complexity = -np.sum(np.log(np.diag(L)))
|
| 228 |
+
normalization = -0.5 * len(y) * np.log(2 * np.pi)
|
| 229 |
+
|
| 230 |
+
return float(data_fit + complexity + normalization)
|
| 231 |
+
|
| 232 |
+
except np.linalg.LinAlgError:
|
| 233 |
+
sign, logdet = np.linalg.slogdet(K_noisy)
|
| 234 |
+
K_inv = np.linalg.pinv(K_noisy)
|
| 235 |
+
|
| 236 |
+
data_fit = -0.5 * y.T @ K_inv @ y
|
| 237 |
+
complexity = -0.5 * logdet if sign > 0 else -1e6
|
| 238 |
+
normalization = -0.5 * len(y) * np.log(2 * np.pi)
|
| 239 |
+
|
| 240 |
+
return float(data_fit + complexity + normalization)
|
| 241 |
+
|
| 242 |
+
def get_covariance_matrix(self, X: np.ndarray) -> np.ndarray:
|
| 243 |
+
"""Get the covariance matrix for given inputs"""
|
| 244 |
+
return self._compute_covariance(X, X)
|
| 245 |
+
|
| 246 |
+
def update_parameters(self, gradient_dict: dict, learning_rate: float = 0.001):
|
| 247 |
+
"""Update model parameters using computed gradients"""
|
| 248 |
+
for param_name, gradient in gradient_dict.items():
|
| 249 |
+
if hasattr(self, param_name):
|
| 250 |
+
current_param = getattr(self, param_name)
|
| 251 |
+
updated_param = current_param - learning_rate * gradient
|
| 252 |
+
setattr(self, param_name, updated_param)
|
| 253 |
+
|
| 254 |
+
|
| 255 |
+
class CHGOptimizer:
|
| 256 |
+
"""
|
| 257 |
+
Optimizer for CHG model parameters using numerical gradients
|
| 258 |
+
|
| 259 |
+
Parameters:
|
| 260 |
+
-----------
|
| 261 |
+
model : CHG
|
| 262 |
+
CHG model instance to optimize
|
| 263 |
+
learning_rate : float
|
| 264 |
+
Learning rate for parameter updates
|
| 265 |
+
"""
|
| 266 |
+
|
| 267 |
+
def __init__(self, model: CHG, learning_rate: float = 0.001):
|
| 268 |
+
self.model = model
|
| 269 |
+
self.lr = learning_rate
|
| 270 |
+
|
| 271 |
+
def compute_gradients(self, X: np.ndarray, y: np.ndarray, noise_var: float = 1e-6):
|
| 272 |
+
"""Compute numerical gradients for all model parameters"""
|
| 273 |
+
gradients = {}
|
| 274 |
+
eps = 1e-5
|
| 275 |
+
|
| 276 |
+
base_loss = -self.model.log_marginal_likelihood(X, y, noise_var)
|
| 277 |
+
|
| 278 |
+
for param_name in ['W_q', 'W_k', 'W_v', 'W_ff1', 'W_ff2', 'W_heads', 'scale']:
|
| 279 |
+
param = getattr(self.model, param_name)
|
| 280 |
+
grad = np.zeros_like(param)
|
| 281 |
+
|
| 282 |
+
flat_param = param.flatten()
|
| 283 |
+
flat_grad = grad.flatten()
|
| 284 |
+
|
| 285 |
+
for i in range(len(flat_param)):
|
| 286 |
+
flat_param[i] += eps
|
| 287 |
+
param_plus = flat_param.reshape(param.shape)
|
| 288 |
+
setattr(self.model, param_name, param_plus)
|
| 289 |
+
|
| 290 |
+
loss_plus = -self.model.log_marginal_likelihood(X, y, noise_var)
|
| 291 |
+
|
| 292 |
+
flat_param[i] -= 2 * eps
|
| 293 |
+
param_minus = flat_param.reshape(param.shape)
|
| 294 |
+
setattr(self.model, param_name, param_minus)
|
| 295 |
+
|
| 296 |
+
loss_minus = -self.model.log_marginal_likelihood(X, y, noise_var)
|
| 297 |
+
|
| 298 |
+
flat_grad[i] = (loss_plus - loss_minus) / (2 * eps)
|
| 299 |
+
flat_param[i] += eps
|
| 300 |
+
|
| 301 |
+
setattr(self.model, param_name, flat_param.reshape(param.shape))
|
| 302 |
+
gradients[param_name] = flat_grad.reshape(param.shape)
|
| 303 |
+
|
| 304 |
+
return gradients
|
| 305 |
+
|
| 306 |
+
def step(self, X: np.ndarray, y: np.ndarray, noise_var: float = 1e-6):
|
| 307 |
+
"""Perform one optimization step"""
|
| 308 |
+
gradients = self.compute_gradients(X, y, noise_var)
|
| 309 |
+
self.model.update_parameters(gradients, self.lr)
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def run_chg_experiment():
|
| 313 |
+
"""
|
| 314 |
+
Run a simple experiment to demonstrate CHG functionality
|
| 315 |
+
|
| 316 |
+
Returns:
|
| 317 |
+
--------
|
| 318 |
+
Tuple
|
| 319 |
+
Trained model, predictions, and variances
|
| 320 |
+
"""
|
| 321 |
+
# Initialize CHG model
|
| 322 |
+
model = CHG(input_dim=3, hidden_dim=24, num_heads=4)
|
| 323 |
+
|
| 324 |
+
# Generate synthetic data
|
| 325 |
+
np.random.seed(42)
|
| 326 |
+
X_train = np.random.randn(80, 3)
|
| 327 |
+
y_train = np.sum(X_train**2, axis=1) + 0.3 * np.sin(2 * X_train[:, 0]) + 0.1 * np.random.randn(80)
|
| 328 |
+
|
| 329 |
+
X_test = np.random.randn(25, 3)
|
| 330 |
+
y_test = np.sum(X_test**2, axis=1) + 0.3 * np.sin(2 * X_test[:, 0])
|
| 331 |
+
|
| 332 |
+
# CHG prediction
|
| 333 |
+
pred_mean, pred_var = model.fit_predict(X_train, y_train, X_test)
|
| 334 |
+
|
| 335 |
+
# Evaluation metrics
|
| 336 |
+
rmse = np.sqrt(np.mean((pred_mean - y_test)**2))
|
| 337 |
+
mae = np.mean(np.abs(pred_mean - y_test))
|
| 338 |
+
|
| 339 |
+
# Uncertainty quantification
|
| 340 |
+
pred_std = np.sqrt(pred_var)
|
| 341 |
+
coverage = np.mean((y_test >= pred_mean - 1.96 * pred_std) &
|
| 342 |
+
(y_test <= pred_mean + 1.96 * pred_std))
|
| 343 |
+
|
| 344 |
+
print(f"CHG Performance:")
|
| 345 |
+
print(f"RMSE: {rmse:.4f}")
|
| 346 |
+
print(f"MAE: {mae:.4f}")
|
| 347 |
+
print(f"Coverage: {coverage:.4f}")
|
| 348 |
+
print(f"Log Marginal Likelihood: {model.log_marginal_likelihood(X_train, y_train):.4f}")
|
| 349 |
+
|
| 350 |
+
return model, pred_mean, pred_var
|
chg_package/examples/basic_example.py
ADDED
|
@@ -0,0 +1,153 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Basic example demonstrating CHG algorithm usage
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import numpy as np
|
| 6 |
+
import matplotlib.pyplot as plt
|
| 7 |
+
from chg_algorithm import CHG, CHGOptimizer
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
def basic_regression_example():
|
| 11 |
+
"""Demonstrate basic regression with CHG"""
|
| 12 |
+
print("=== Basic CHG Regression Example ===")
|
| 13 |
+
|
| 14 |
+
# Generate synthetic 1D data for visualization
|
| 15 |
+
np.random.seed(42)
|
| 16 |
+
X_train = np.random.uniform(-3, 3, (50, 1))
|
| 17 |
+
y_train = np.sin(X_train.flatten()) + 0.1 * np.random.randn(50)
|
| 18 |
+
|
| 19 |
+
X_test = np.linspace(-4, 4, 100).reshape(-1, 1)
|
| 20 |
+
y_true = np.sin(X_test.flatten())
|
| 21 |
+
|
| 22 |
+
# Initialize and fit CHG model
|
| 23 |
+
model = CHG(input_dim=1, hidden_dim=16, num_heads=2)
|
| 24 |
+
pred_mean, pred_var = model.fit_predict(X_train, y_train, X_test)
|
| 25 |
+
pred_std = np.sqrt(pred_var)
|
| 26 |
+
|
| 27 |
+
# Print metrics
|
| 28 |
+
mse = np.mean((pred_mean - y_true)**2)
|
| 29 |
+
print(f"Mean Squared Error: {mse:.4f}")
|
| 30 |
+
print(f"Log Marginal Likelihood: {model.log_marginal_likelihood(X_train, y_train):.4f}")
|
| 31 |
+
|
| 32 |
+
# Visualization
|
| 33 |
+
plt.figure(figsize=(10, 6))
|
| 34 |
+
plt.scatter(X_train.flatten(), y_train, alpha=0.6, label='Training Data')
|
| 35 |
+
plt.plot(X_test.flatten(), y_true, 'r-', label='True Function')
|
| 36 |
+
plt.plot(X_test.flatten(), pred_mean, 'b-', label='CHG Prediction')
|
| 37 |
+
plt.fill_between(X_test.flatten(),
|
| 38 |
+
pred_mean - 2*pred_std,
|
| 39 |
+
pred_mean + 2*pred_std,
|
| 40 |
+
alpha=0.2, label='95% Confidence')
|
| 41 |
+
plt.xlabel('Input')
|
| 42 |
+
plt.ylabel('Output')
|
| 43 |
+
plt.title('CHG Gaussian Process Regression')
|
| 44 |
+
plt.legend()
|
| 45 |
+
plt.grid(True, alpha=0.3)
|
| 46 |
+
plt.show()
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def optimization_example():
|
| 50 |
+
"""Demonstrate parameter optimization"""
|
| 51 |
+
print("\n=== CHG Optimization Example ===")
|
| 52 |
+
|
| 53 |
+
# Generate data
|
| 54 |
+
np.random.seed(123)
|
| 55 |
+
X_train = np.random.randn(80, 2)
|
| 56 |
+
y_train = np.sum(X_train**2, axis=1) + 0.5 * np.random.randn(80)
|
| 57 |
+
|
| 58 |
+
# Initialize model and optimizer
|
| 59 |
+
model = CHG(input_dim=2, hidden_dim=20, num_heads=3)
|
| 60 |
+
optimizer = CHGOptimizer(model, learning_rate=0.01)
|
| 61 |
+
|
| 62 |
+
# Track optimization progress
|
| 63 |
+
lml_history = []
|
| 64 |
+
|
| 65 |
+
print("Optimizing CHG parameters...")
|
| 66 |
+
for epoch in range(20):
|
| 67 |
+
optimizer.step(X_train, y_train)
|
| 68 |
+
lml = model.log_marginal_likelihood(X_train, y_train)
|
| 69 |
+
lml_history.append(lml)
|
| 70 |
+
|
| 71 |
+
if epoch % 5 == 0:
|
| 72 |
+
print(f"Epoch {epoch:2d}: Log Marginal Likelihood = {lml:.4f}")
|
| 73 |
+
|
| 74 |
+
# Plot optimization progress
|
| 75 |
+
plt.figure(figsize=(8, 5))
|
| 76 |
+
plt.plot(lml_history, 'b-', linewidth=2)
|
| 77 |
+
plt.xlabel('Epoch')
|
| 78 |
+
plt.ylabel('Log Marginal Likelihood')
|
| 79 |
+
plt.title('CHG Optimization Progress')
|
| 80 |
+
plt.grid(True, alpha=0.3)
|
| 81 |
+
plt.show()
|
| 82 |
+
|
| 83 |
+
print(f"Final Log Marginal Likelihood: {lml_history[-1]:.4f}")
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def uncertainty_quantification_example():
|
| 87 |
+
"""Demonstrate uncertainty quantification capabilities"""
|
| 88 |
+
print("\n=== Uncertainty Quantification Example ===")
|
| 89 |
+
|
| 90 |
+
# Generate noisy data with outliers
|
| 91 |
+
np.random.seed(456)
|
| 92 |
+
X_train = np.random.uniform(-2, 2, (60, 1))
|
| 93 |
+
y_clean = 0.5 * X_train.flatten()**3 - X_train.flatten()
|
| 94 |
+
|
| 95 |
+
# Add noise and some outliers
|
| 96 |
+
noise = 0.2 * np.random.randn(60)
|
| 97 |
+
outlier_idx = np.random.choice(60, 5, replace=False)
|
| 98 |
+
noise[outlier_idx] += np.random.choice([-2, 2], 5) * 2 # Add outliers
|
| 99 |
+
|
| 100 |
+
y_train = y_clean + noise
|
| 101 |
+
|
| 102 |
+
X_test = np.linspace(-3, 3, 80).reshape(-1, 1)
|
| 103 |
+
|
| 104 |
+
# Fit CHG model
|
| 105 |
+
model = CHG(input_dim=1, hidden_dim=12, num_heads=2)
|
| 106 |
+
pred_mean, pred_var = model.fit_predict(X_train, y_train, X_test)
|
| 107 |
+
pred_std = np.sqrt(pred_var)
|
| 108 |
+
|
| 109 |
+
# Analyze uncertainties
|
| 110 |
+
high_uncertainty_idx = pred_std > np.percentile(pred_std, 75)
|
| 111 |
+
print(f"Percentage of high-uncertainty predictions: {np.mean(high_uncertainty_idx)*100:.1f}%")
|
| 112 |
+
print(f"Average prediction uncertainty: {np.mean(pred_std):.4f}")
|
| 113 |
+
print(f"Maximum prediction uncertainty: {np.max(pred_std):.4f}")
|
| 114 |
+
|
| 115 |
+
# Visualization
|
| 116 |
+
plt.figure(figsize=(12, 5))
|
| 117 |
+
|
| 118 |
+
plt.subplot(1, 2, 1)
|
| 119 |
+
plt.scatter(X_train.flatten(), y_train, alpha=0.7, c='red', label='Training Data (with outliers)')
|
| 120 |
+
plt.plot(X_test.flatten(), pred_mean, 'b-', linewidth=2, label='CHG Prediction')
|
| 121 |
+
plt.fill_between(X_test.flatten(),
|
| 122 |
+
pred_mean - 2*pred_std,
|
| 123 |
+
pred_mean + 2*pred_std,
|
| 124 |
+
alpha=0.3, label='95% Confidence')
|
| 125 |
+
plt.xlabel('Input')
|
| 126 |
+
plt.ylabel('Output')
|
| 127 |
+
plt.title('CHG Predictions with Uncertainty')
|
| 128 |
+
plt.legend()
|
| 129 |
+
plt.grid(True, alpha=0.3)
|
| 130 |
+
|
| 131 |
+
plt.subplot(1, 2, 2)
|
| 132 |
+
plt.plot(X_test.flatten(), pred_std, 'g-', linewidth=2)
|
| 133 |
+
plt.fill_between(X_test.flatten()[high_uncertainty_idx],
|
| 134 |
+
0, pred_std[high_uncertainty_idx],
|
| 135 |
+
alpha=0.4, color='orange',
|
| 136 |
+
label='High Uncertainty Regions')
|
| 137 |
+
plt.xlabel('Input')
|
| 138 |
+
plt.ylabel('Prediction Uncertainty (σ)')
|
| 139 |
+
plt.title('Uncertainty Estimation')
|
| 140 |
+
plt.legend()
|
| 141 |
+
plt.grid(True, alpha=0.3)
|
| 142 |
+
|
| 143 |
+
plt.tight_layout()
|
| 144 |
+
plt.show()
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
if __name__ == "__main__":
|
| 148 |
+
# Run all examples
|
| 149 |
+
basic_regression_example()
|
| 150 |
+
optimization_example()
|
| 151 |
+
uncertainty_quantification_example()
|
| 152 |
+
|
| 153 |
+
print("\n=== All Examples Completed ===")
|
chg_package/pyproject.toml
ADDED
|
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[build-system]
|
| 2 |
+
requires = ["setuptools>=45", "wheel", "setuptools_scm"]
|
| 3 |
+
build-backend = "setuptools.build_meta"
|
| 4 |
+
|
| 5 |
+
[project]
|
| 6 |
+
name = "chg-algorithm"
|
| 7 |
+
description = "CHG (Covariance-based Hilbert Geometry) Algorithm for Gaussian Process Regression"
|
| 8 |
+
readme = "README.md"
|
| 9 |
+
requires-python = ">=3.7"
|
| 10 |
+
license = {text = "MIT"}
|
| 11 |
+
authors = [
|
| 12 |
+
{name = "CHG Algorithm Team", email = "chg@example.com"}
|
| 13 |
+
]
|
| 14 |
+
keywords = ["gaussian-process", "machine-learning", "regression", "uncertainty-quantification"]
|
| 15 |
+
classifiers = [
|
| 16 |
+
"Development Status :: 4 - Beta",
|
| 17 |
+
"Intended Audience :: Science/Research",
|
| 18 |
+
"License :: OSI Approved :: MIT License",
|
| 19 |
+
"Operating System :: OS Independent",
|
| 20 |
+
"Programming Language :: Python :: 3",
|
| 21 |
+
"Programming Language :: Python :: 3.7",
|
| 22 |
+
"Programming Language :: Python :: 3.8",
|
| 23 |
+
"Programming Language :: Python :: 3.9",
|
| 24 |
+
"Programming Language :: Python :: 3.10",
|
| 25 |
+
"Programming Language :: Python :: 3.11",
|
| 26 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
| 27 |
+
]
|
| 28 |
+
dependencies = [
|
| 29 |
+
"numpy>=1.19.0",
|
| 30 |
+
"typing-extensions>=3.7.4"
|
| 31 |
+
]
|
| 32 |
+
dynamic = ["version"]
|
| 33 |
+
|
| 34 |
+
[project.optional-dependencies]
|
| 35 |
+
dev = [
|
| 36 |
+
"pytest>=6.0",
|
| 37 |
+
"pytest-cov>=2.0",
|
| 38 |
+
"black>=21.0",
|
| 39 |
+
"flake8>=3.9",
|
| 40 |
+
"mypy>=0.910"
|
| 41 |
+
]
|
| 42 |
+
docs = [
|
| 43 |
+
"sphinx>=4.0",
|
| 44 |
+
"sphinx-rtd-theme>=1.0"
|
| 45 |
+
]
|
| 46 |
+
examples = [
|
| 47 |
+
"matplotlib>=3.0"
|
| 48 |
+
]
|
| 49 |
+
|
| 50 |
+
[project.scripts]
|
| 51 |
+
chg-demo = "chg_algorithm.core:run_chg_experiment"
|
| 52 |
+
|
| 53 |
+
[tool.setuptools.dynamic]
|
| 54 |
+
version = {attr = "chg_algorithm.__version__"}
|
| 55 |
+
|
| 56 |
+
[tool.black]
|
| 57 |
+
line-length = 88
|
| 58 |
+
target-version = ['py37', 'py38', 'py39', 'py310', 'py311']
|
| 59 |
+
|
| 60 |
+
[tool.mypy]
|
| 61 |
+
python_version = "3.7"
|
| 62 |
+
warn_return_any = true
|
| 63 |
+
warn_unused_configs = true
|
| 64 |
+
disallow_untyped_defs = true
|
| 65 |
+
|
| 66 |
+
[tool.pytest.ini_options]
|
| 67 |
+
testpaths = ["tests"]
|
| 68 |
+
python_files = ["test_*.py"]
|
| 69 |
+
addopts = "--verbose --cov=chg_algorithm --cov-report=html --cov-report=term"
|
chg_package/requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
numpy>=1.19.0
|
| 2 |
+
typing-extensions>=3.7.4
|
chg_package/setup.py
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Setup script for CHG Algorithm Package
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
from setuptools import setup, find_packages
|
| 6 |
+
|
| 7 |
+
with open("README.md", "r", encoding="utf-8") as fh:
|
| 8 |
+
long_description = fh.read()
|
| 9 |
+
|
| 10 |
+
with open("requirements.txt", "r", encoding="utf-8") as fh:
|
| 11 |
+
requirements = [line.strip() for line in fh if line.strip() and not line.startswith("#")]
|
| 12 |
+
|
| 13 |
+
setup(
|
| 14 |
+
name="chg-algorithm",
|
| 15 |
+
version="1.0.0",
|
| 16 |
+
author="CHG Algorithm Team",
|
| 17 |
+
author_email="chg@example.com",
|
| 18 |
+
description="CHG (Covariance-based Hilbert Geometry) Algorithm for Gaussian Process Regression",
|
| 19 |
+
long_description=long_description,
|
| 20 |
+
long_description_content_type="text/markdown",
|
| 21 |
+
url="https://github.com/your-username/chg-algorithm",
|
| 22 |
+
packages=find_packages(),
|
| 23 |
+
classifiers=[
|
| 24 |
+
"Development Status :: 4 - Beta",
|
| 25 |
+
"Intended Audience :: Science/Research",
|
| 26 |
+
"License :: OSI Approved :: MIT License",
|
| 27 |
+
"Operating System :: OS Independent",
|
| 28 |
+
"Programming Language :: Python :: 3",
|
| 29 |
+
"Programming Language :: Python :: 3.7",
|
| 30 |
+
"Programming Language :: Python :: 3.8",
|
| 31 |
+
"Programming Language :: Python :: 3.9",
|
| 32 |
+
"Programming Language :: Python :: 3.10",
|
| 33 |
+
"Programming Language :: Python :: 3.11",
|
| 34 |
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
| 35 |
+
"Topic :: Scientific/Engineering :: Mathematics",
|
| 36 |
+
],
|
| 37 |
+
python_requires=">=3.7",
|
| 38 |
+
install_requires=requirements,
|
| 39 |
+
extras_require={
|
| 40 |
+
"dev": [
|
| 41 |
+
"pytest>=6.0",
|
| 42 |
+
"pytest-cov>=2.0",
|
| 43 |
+
"black>=21.0",
|
| 44 |
+
"flake8>=3.9",
|
| 45 |
+
"mypy>=0.910",
|
| 46 |
+
],
|
| 47 |
+
"docs": [
|
| 48 |
+
"sphinx>=4.0",
|
| 49 |
+
"sphinx-rtd-theme>=1.0",
|
| 50 |
+
]
|
| 51 |
+
},
|
| 52 |
+
entry_points={
|
| 53 |
+
"console_scripts": [
|
| 54 |
+
"chg-demo=chg_algorithm.core:run_chg_experiment",
|
| 55 |
+
],
|
| 56 |
+
},
|
| 57 |
+
include_package_data=True,
|
| 58 |
+
zip_safe=False,
|
| 59 |
+
)
|
chg_package/tests/test_chg.py
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Unit tests for CHG algorithm
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import unittest
|
| 6 |
+
import numpy as np
|
| 7 |
+
from chg_algorithm import CHG, CHGOptimizer
|
| 8 |
+
|
| 9 |
+
|
| 10 |
+
class TestCHG(unittest.TestCase):
|
| 11 |
+
|
| 12 |
+
def setUp(self):
|
| 13 |
+
"""Set up test fixtures"""
|
| 14 |
+
self.model = CHG(input_dim=2, hidden_dim=8, num_heads=2)
|
| 15 |
+
self.X_train = np.random.randn(20, 2)
|
| 16 |
+
self.y_train = np.random.randn(20)
|
| 17 |
+
self.X_test = np.random.randn(10, 2)
|
| 18 |
+
|
| 19 |
+
def test_model_initialization(self):
|
| 20 |
+
"""Test model initialization"""
|
| 21 |
+
self.assertEqual(self.model.input_dim, 2)
|
| 22 |
+
self.assertEqual(self.model.hidden_dim, 8)
|
| 23 |
+
self.assertEqual(self.model.num_heads, 2)
|
| 24 |
+
self.assertEqual(self.model.head_dim, 4)
|
| 25 |
+
|
| 26 |
+
def test_parameter_shapes(self):
|
| 27 |
+
"""Test parameter matrix shapes"""
|
| 28 |
+
self.assertEqual(self.model.W_q.shape, (2, 8))
|
| 29 |
+
self.assertEqual(self.model.W_k.shape, (2, 8))
|
| 30 |
+
self.assertEqual(self.model.W_v.shape, (2, 8))
|
| 31 |
+
self.assertEqual(self.model.W_heads.shape, (2, 1))
|
| 32 |
+
|
| 33 |
+
def test_covariance_computation(self):
|
| 34 |
+
"""Test covariance matrix computation"""
|
| 35 |
+
K = self.model._compute_covariance(self.X_train, self.X_train)
|
| 36 |
+
|
| 37 |
+
# Check shape
|
| 38 |
+
self.assertEqual(K.shape, (20, 20))
|
| 39 |
+
|
| 40 |
+
# Check symmetry (approximately)
|
| 41 |
+
self.assertTrue(np.allclose(K, K.T, atol=1e-6))
|
| 42 |
+
|
| 43 |
+
# Check positive semi-definiteness
|
| 44 |
+
eigenvals = np.linalg.eigvals(K)
|
| 45 |
+
self.assertTrue(np.all(eigenvals >= -1e-6))
|
| 46 |
+
|
| 47 |
+
def test_fit_predict(self):
|
| 48 |
+
"""Test fit and predict functionality"""
|
| 49 |
+
pred_mean, pred_var = self.model.fit_predict(
|
| 50 |
+
self.X_train, self.y_train, self.X_test
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Check output shapes
|
| 54 |
+
self.assertEqual(pred_mean.shape, (10,))
|
| 55 |
+
self.assertEqual(pred_var.shape, (10,))
|
| 56 |
+
|
| 57 |
+
# Check variance is positive
|
| 58 |
+
self.assertTrue(np.all(pred_var > 0))
|
| 59 |
+
|
| 60 |
+
def test_log_marginal_likelihood(self):
|
| 61 |
+
"""Test log marginal likelihood computation"""
|
| 62 |
+
lml = self.model.log_marginal_likelihood(self.X_train, self.y_train)
|
| 63 |
+
|
| 64 |
+
# Should return a finite number
|
| 65 |
+
self.assertTrue(np.isfinite(lml))
|
| 66 |
+
self.assertIsInstance(lml, float)
|
| 67 |
+
|
| 68 |
+
def test_layer_norm(self):
|
| 69 |
+
"""Test layer normalization"""
|
| 70 |
+
x = np.random.randn(5, 8)
|
| 71 |
+
gamma = np.ones(8)
|
| 72 |
+
beta = np.zeros(8)
|
| 73 |
+
|
| 74 |
+
normalized = self.model._layer_norm(x, gamma, beta)
|
| 75 |
+
|
| 76 |
+
# Check shape preservation
|
| 77 |
+
self.assertEqual(normalized.shape, x.shape)
|
| 78 |
+
|
| 79 |
+
# Check normalization (mean ≈ 0, std ≈ 1)
|
| 80 |
+
mean = np.mean(normalized, axis=-1)
|
| 81 |
+
std = np.std(normalized, axis=-1)
|
| 82 |
+
self.assertTrue(np.allclose(mean, 0, atol=1e-6))
|
| 83 |
+
self.assertTrue(np.allclose(std, 1, atol=1e-6))
|
| 84 |
+
|
| 85 |
+
def test_gelu_activation(self):
|
| 86 |
+
"""Test GELU activation function"""
|
| 87 |
+
x = np.array([-2, -1, 0, 1, 2])
|
| 88 |
+
result = self.model._gelu(x)
|
| 89 |
+
|
| 90 |
+
# Check shape preservation
|
| 91 |
+
self.assertEqual(result.shape, x.shape)
|
| 92 |
+
|
| 93 |
+
# Check monotonicity
|
| 94 |
+
self.assertTrue(np.all(np.diff(result) > 0))
|
| 95 |
+
|
| 96 |
+
# Check specific values
|
| 97 |
+
self.assertAlmostEqual(result[2], 0.0, places=6) # GELU(0) = 0
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
class TestCHGOptimizer(unittest.TestCase):
|
| 101 |
+
|
| 102 |
+
def setUp(self):
|
| 103 |
+
"""Set up test fixtures"""
|
| 104 |
+
self.model = CHG(input_dim=2, hidden_dim=6, num_heads=2)
|
| 105 |
+
self.optimizer = CHGOptimizer(self.model, learning_rate=0.01)
|
| 106 |
+
self.X = np.random.randn(15, 2)
|
| 107 |
+
self.y = np.random.randn(15)
|
| 108 |
+
|
| 109 |
+
def test_optimizer_initialization(self):
|
| 110 |
+
"""Test optimizer initialization"""
|
| 111 |
+
self.assertEqual(self.optimizer.lr, 0.01)
|
| 112 |
+
self.assertIs(self.optimizer.model, self.model)
|
| 113 |
+
|
| 114 |
+
def test_gradient_computation(self):
|
| 115 |
+
"""Test gradient computation"""
|
| 116 |
+
gradients = self.optimizer.compute_gradients(self.X, self.y)
|
| 117 |
+
|
| 118 |
+
# Check that gradients are computed for all parameters
|
| 119 |
+
expected_params = ['W_q', 'W_k', 'W_v', 'W_ff1', 'W_ff2', 'W_heads', 'scale']
|
| 120 |
+
for param in expected_params:
|
| 121 |
+
self.assertIn(param, gradients)
|
| 122 |
+
|
| 123 |
+
# Check gradient shape matches parameter shape
|
| 124 |
+
param_shape = getattr(self.model, param).shape
|
| 125 |
+
self.assertEqual(gradients[param].shape, param_shape)
|
| 126 |
+
|
| 127 |
+
def test_optimization_step(self):
|
| 128 |
+
"""Test optimization step"""
|
| 129 |
+
# Store initial parameter values
|
| 130 |
+
initial_params = {}
|
| 131 |
+
for param in ['W_q', 'W_k', 'W_v', 'W_heads', 'scale']:
|
| 132 |
+
initial_params[param] = getattr(self.model, param).copy()
|
| 133 |
+
|
| 134 |
+
# Perform optimization step
|
| 135 |
+
self.optimizer.step(self.X, self.y)
|
| 136 |
+
|
| 137 |
+
# Check that parameters have been updated
|
| 138 |
+
for param in initial_params:
|
| 139 |
+
updated_param = getattr(self.model, param)
|
| 140 |
+
self.assertFalse(np.allclose(initial_params[param], updated_param))
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
class TestExperiment(unittest.TestCase):
|
| 144 |
+
|
| 145 |
+
def test_run_chg_experiment(self):
|
| 146 |
+
"""Test the complete experiment function"""
|
| 147 |
+
from chg_algorithm import run_chg_experiment
|
| 148 |
+
|
| 149 |
+
# Redirect stdout to capture print statements
|
| 150 |
+
import io
|
| 151 |
+
import sys
|
| 152 |
+
captured_output = io.StringIO()
|
| 153 |
+
sys.stdout = captured_output
|
| 154 |
+
|
| 155 |
+
try:
|
| 156 |
+
model, pred_mean, pred_var = run_chg_experiment()
|
| 157 |
+
|
| 158 |
+
# Restore stdout
|
| 159 |
+
sys.stdout = sys.__stdout__
|
| 160 |
+
|
| 161 |
+
# Check outputs
|
| 162 |
+
self.assertIsInstance(model, CHG)
|
| 163 |
+
self.assertEqual(pred_mean.shape, (25,))
|
| 164 |
+
self.assertEqual(pred_var.shape, (25,))
|
| 165 |
+
self.assertTrue(np.all(pred_var > 0))
|
| 166 |
+
|
| 167 |
+
# Check that performance metrics were printed
|
| 168 |
+
output = captured_output.getvalue()
|
| 169 |
+
self.assertIn("CHG Performance", output)
|
| 170 |
+
self.assertIn("RMSE", output)
|
| 171 |
+
self.assertIn("MAE", output)
|
| 172 |
+
|
| 173 |
+
finally:
|
| 174 |
+
# Ensure stdout is restored even if test fails
|
| 175 |
+
sys.stdout = sys.__stdout__
|
| 176 |
+
|
| 177 |
+
|
| 178 |
+
if __name__ == "__main__":
|
| 179 |
+
unittest.main()
|