File size: 2,942 Bytes
51457b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8412998
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51457b7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
from __future__ import annotations

import ast
import statistics
from collections import deque

from thefuzz import fuzz


# Global solve rate history buffer: {seed_id: deque(maxlen=20)}
solve_rate_history: dict[str, deque[float]] = {}


def reset_reward_history() -> None:
    solve_rate_history.clear()


def get_solve_rate(seed_id: str) -> float:
    if seed_id not in solve_rate_history or not solve_rate_history[seed_id]:
        return 0.5
    return statistics.mean(solve_rate_history[seed_id])


def record_solve_result(seed_id: str, solved: bool) -> None:
    if seed_id not in solve_rate_history:
        solve_rate_history[seed_id] = deque(maxlen=20)
    solve_rate_history[seed_id].append(1.0 if solved else 0.0)


def is_effectively_unchanged(original_code: str, candidate_code: str) -> bool:
    try:
        return ast.dump(ast.parse(original_code)) == ast.dump(ast.parse(candidate_code))
    except SyntaxError:
        return original_code.strip() == candidate_code.strip()


def compute_ast_distance(original_code: str, mutated_code: str) -> float:
    """

    Computes the string similarity distance between the AST dumps of the original

    and mutated code using thefuzz (Levenshtein based).

    Zero edits = 0 score.

    Targeted (small) edit = high plausibility (closer to 1.0).

    Random / wide corruption = low score.

    """
    try:
        orig_ast = ast.dump(ast.parse(original_code))
        mut_ast = ast.dump(ast.parse(mutated_code))
    except SyntaxError:
        return 0.0

    ratio = fuzz.ratio(orig_ast, mut_ast)

    # The Fuzz defaults to percentage (0 to 100)
    # Empirical calibration: simple AST mutations typically result in
    # a fuzz ratio of 85-98%.
    if 85 <= ratio:
        return 1.0
    if 50 <= ratio < 85:
        return max(0.1, (ratio - 50) / 35.0)
    return 0.0


def compute_proposer_reward(meta: dict) -> float:
    if meta.get("syntax_error", False):
        return -0.5

    if meta.get("unsafe_code", False):
        return -0.5

    if meta.get("unchanged_code", False):
        return 0.0

    if meta.get("tests_passed", True):
        return 0.0

    if meta.get("changed_but_passing", False):
        return -0.1

    plausibility_bonus = meta.get("plausibility_score", 0.0)
    learnability_bonus = 0.0
    solve_rate = get_solve_rate(meta["seed_id"])
    if 0.2 <= solve_rate <= 0.8:
        learnability_bonus = 1.0

    return 1.0 + plausibility_bonus + learnability_bonus


def compute_solver_reward(meta: dict) -> float:
    solved = meta.get("tests_passed", False)
    syntax_error = meta.get("syntax_error", True)
    unsafe_code = meta.get("unsafe_code", False)

    record_solve_result(meta["seed_id"], solved and not syntax_error and not unsafe_code)

    if syntax_error or unsafe_code:
        return -0.5
    if solved:
        return 1.0
    return 0.0