| | |
| | """ |
| | AETHER-Micro Self-Evaluation Head |
| | |
| | 4-dimensional quality assessment for RLP training |
| | """ |
| |
|
| | import torch |
| | import torch.nn as nn |
| |
|
| | from .configuration_aether_micro import AETHERMicroConfig |
| |
|
| |
|
| | class AETHERMicroSelfEvalHead(nn.Module): |
| | """ |
| | Self-Evaluation Head for Quality Assessment |
| | |
| | Features: |
| | - 4-dim Quality: factuality, coherence, completeness, specificity |
| | - Overall Score: 1-dim sigmoid for aggregated quality |
| | - Parameter Count: ~600K |
| | |
| | Architecture: |
| | hidden_size → 1024 → 512 → 4 quality dims |
| | → 512 → 1 overall score |
| | |
| | Quality Dimensions: |
| | - factuality: 사실성/정확성 (0~1) |
| | - coherence: 일관성/논리성 (0~1) |
| | - completeness: 완결성/충분성 (0~1) |
| | - specificity: 구체성/상세성 (0~1) |
| | |
| | Overall Score: |
| | - Aggregated quality (0~1) |
| | - Used for RLP reward signal |
| | """ |
| |
|
| | def __init__(self, config: AETHERMicroConfig): |
| | super().__init__() |
| | self.config = config |
| | self.hidden_size = config.hidden_size |
| | self.self_eval_dims = config.self_eval_dims |
| |
|
| | |
| | self.encoder = nn.Sequential( |
| | nn.Linear(self.hidden_size, 1024), |
| | nn.GELU(), |
| | nn.Linear(1024, 512), |
| | nn.GELU() |
| | ) |
| |
|
| | |
| | self.quality_head = nn.Linear(512, self.self_eval_dims) |
| |
|
| | |
| | self.overall_head = nn.Linear(512, 1) |
| |
|
| | def forward(self, hidden_states: torch.Tensor) -> tuple: |
| | """ |
| | Args: |
| | hidden_states: (batch_size, seq_length, hidden_size) |
| | |
| | Returns: |
| | quality: (batch_size, seq_length, 4) - [factuality, coherence, completeness, specificity] |
| | overall: (batch_size, seq_length, 1) - aggregated quality score |
| | """ |
| | |
| | encoded = self.encoder(hidden_states) |
| |
|
| | |
| | quality = torch.sigmoid(self.quality_head(encoded)) |
| |
|
| | |
| | overall = torch.sigmoid(self.overall_head(encoded)) |
| |
|
| | return quality, overall |
| |
|
| |
|
| | |
| | |
| | |
| |
|
| | def count_self_eval_parameters(config: AETHERMicroConfig) -> int: |
| | """ |
| | Calculate Self-Evaluation Head parameter count |
| | |
| | Expected: ~600K parameters |
| | |
| | Breakdown: |
| | hidden_size → 1024: 896 × 1024 = 917,504 |
| | 1024 → 512: 1024 × 512 = 524,288 |
| | 512 → 4 (quality): 512 × 4 = 2,048 |
| | 512 → 1 (overall): 512 × 1 = 512 |
| | Total: 917,504 + 524,288 + 2,048 + 512 = 1,444,352 |
| | |
| | Note: 기존 NIPA 모델과 약간 차이 (1.4M vs 600K) |
| | 실제 기존 모델 구조 재확인 필요 |
| | """ |
| | head = AETHERMicroSelfEvalHead(config) |
| | return sum(p.numel() for p in head.parameters()) |
| |
|
| |
|
| | if __name__ == "__main__": |
| | from .configuration_aether_micro import AETHERMicroConfig |
| |
|
| | config = AETHERMicroConfig() |
| | param_count = count_self_eval_parameters(config) |
| | print(f"Self-Evaluation Head Parameters: {param_count:,}") |
| |
|