Spaces:
Runtime error
Runtime error
File size: 1,569 Bytes
9159c06 cd5c208 9159c06 cd5c208 9159c06 cd5c208 9159c06 cd5c208 9159c06 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 | """Reward shaping logic for RL-ready code analysis scores."""
from __future__ import annotations
from schemas.response import ScoreBreakdown
class RewardService:
"""Compute reward scores from model, domain, lint, and complexity signals."""
def compute(self, *, ml_score: float, domain_score: float, lint_score: float, complexity_penalty: float) -> ScoreBreakdown:
"""Apply dynamic reward shaping based on quality, errors, and completion."""
quality_signal = max(0.0, min(1.0, (0.45 * ml_score) + (0.3 * domain_score) + (0.25 * lint_score)))
error_reduction_signal = max(0.0, min(1.0, lint_score - (0.6 * complexity_penalty)))
completion_signal = max(0.0, min(1.0, (ml_score + domain_score + lint_score) / 3.0))
reward = max(
0.0,
min(
1.0,
(0.35 * quality_signal)
+ (0.25 * completion_signal)
+ (0.2 * error_reduction_signal)
+ (0.1 * ml_score)
+ (0.1 * domain_score)
- (0.15 * complexity_penalty),
),
)
return ScoreBreakdown(
ml_score=round(ml_score, 4),
domain_score=round(domain_score, 4),
lint_score=round(lint_score, 4),
complexity_penalty=round(complexity_penalty, 4),
quality_signal=round(quality_signal, 4),
error_reduction_signal=round(error_reduction_signal, 4),
completion_signal=round(completion_signal, 4),
reward=round(reward, 4),
)
|