File size: 7,911 Bytes
1588266
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# cell 7 author.py – Final production version: stateful, evidence-driven, belief tracking

import re
import ast
from dataclasses import dataclass, field
from typing import List, Dict, Any, Optional

@dataclass
class PersonaAuthor:
    """

    Simulates a human developer with:

    - Continuous belief (confidence)

    - Evidence-based reasoning

    - Conversation memory

    - Code inspection awareness

    """

    personality: str = "defensive"   # defensive | junior | collaborative
    max_persuasion_rounds: int = 5

    # Evidence weights
    weight_test_pass: float = 0.5
    weight_lint_clean: float = 0.2
    weight_doc_found: float = 0.15
    weight_explanation_quality: float = 0.15

    # Personality thresholds
    thresholds: Dict[str, float] = field(default_factory=lambda: {
        "defensive": 0.7,
        "junior": 0.3,
        "collaborative": 0.5,
    })

    # Internal state
    _confidence: float = 0.0
    _conversation: List[Dict[str, Any]] = field(default_factory=list)
    _pushback_count: int = 0
    _last_evidence_score: float = 0.0
    _stagnation_counter: int = 0

    # ------------------------------------------------------------------
    # Lifecycle
    # ------------------------------------------------------------------
    def __post_init__(self):
        self.reset()

    def reset(self):
        self._confidence = 0.0
        self._conversation.clear()
        self._pushback_count = 0
        self._last_evidence_score = 0.0
        self._stagnation_counter = 0

    # ------------------------------------------------------------------
    # Main interaction
    # ------------------------------------------------------------------
    # Added weight for code change magnitude
    weight_code_change: float = 0.1   # small change is better

    def respond(self,

                agent_comment: str = "",

                agent_question: str = "",

                test_results: Optional[str] = None,

                lint_results: Optional[str] = None,

                doc_results: Optional[str] = None,

                proposed_fix: Optional[str] = None,

                original_code: Optional[str] = None) -> str:

        # Store conversation
        self._conversation.append({
            "comment": agent_comment,
            "question": agent_question,
            "test": test_results,
            "lint": lint_results,
            "docs": doc_results
        })

        # Extract structured evidence
        evidence = self._extract_evidence(test_results, lint_results, doc_results)

        # Code inspection
        code_change = 0.0
        if proposed_fix and original_code:
            code_change = self._inspect_code(proposed_fix, original_code)
            evidence["code_change"] = code_change

        # Explanation score
        text = (agent_comment + " " + agent_question).lower()
        explanation_score = self._score_explanation(text)

        # Compute evidence score – now includes code change penalty (1 - change)
        evidence_score = (
            self.weight_test_pass * evidence.get("test_pass_ratio", 0.0) +
            self.weight_lint_clean * (1 - min(1.0, evidence.get("lint_errors", 0)/10)) +
            self.weight_doc_found * (1.0 if evidence.get("doc_found") else 0.0) +
            self.weight_explanation_quality * explanation_score +
            self.weight_code_change * (1.0 - code_change)   # surgical fix rewarded
        )

        evidence_score = max(0.0, min(1.0, evidence_score))

        # Detect improvement
        delta = evidence_score - self._last_evidence_score
        self._last_evidence_score = evidence_score

        if delta > 0.05:
            self._stagnation_counter = 0
        else:
            self._stagnation_counter += 1

        # Update belief (momentum)
        lr = 0.3
        self._confidence = (1 - lr) * self._confidence + lr * evidence_score

        # Penalise stagnation
        if self._stagnation_counter >= 2:
            self._confidence *= 0.9

        # Decision
        threshold = self.thresholds.get(self.personality, 0.5)

        if self._confidence >= threshold or self._pushback_count >= self.max_persuasion_rounds:
            return "Alright, I'm convinced. Let's proceed with your fix."

        # Otherwise push back
        self._pushback_count += 1
        return self._generate_pushback(evidence, text)

    # ------------------------------------------------------------------
    # Evidence extraction
    # ------------------------------------------------------------------
    def _extract_evidence(self, test_results, lint_results, doc_results):
        evidence = {
            "test_pass_ratio": 0.0,
            "lint_errors": 0,
            "doc_found": False
        }

        # Parse test results
        if test_results:
            match = re.search(r'(\d+)\s*/\s*(\d+)', test_results)
            if match:
                p, t = int(match.group(1)), int(match.group(2))
                evidence["test_pass_ratio"] = p / t if t else 0.0
            elif "true" in test_results.lower():
                evidence["test_pass_ratio"] = 1.0
            elif "false" in test_results.lower():
                evidence["test_pass_ratio"] = 0.0

        # Lint errors
        if lint_results:
            evidence["lint_errors"] = len(re.findall(r'error', lint_results.lower()))

        # Docs
        if doc_results and "no relevant" not in doc_results.lower():
            evidence["doc_found"] = True

        return evidence

    # ------------------------------------------------------------------
    # Explanation scoring
    # ------------------------------------------------------------------
    def _score_explanation(self, text: str) -> float:
        score = 0.0

        if "because" in text or "therefore" in text:
            score += 0.3
        if "test" in text or "example" in text:
            score += 0.2
        if len(text.split()) > 30:
            score += 0.2
        if "error" in text or "fix" in text:
            score += 0.1

        return min(1.0, score)

    # ------------------------------------------------------------------
    # Code inspection
    # ------------------------------------------------------------------
    def _inspect_code(self, new_code: str, old_code: str) -> float:
        try:
            t1 = ast.parse(old_code)
            t2 = ast.parse(new_code)

            n1 = len(list(ast.walk(t1)))
            n2 = len(list(ast.walk(t2)))

            change = abs(n2 - n1) / max(n1, 1)
            return min(1.0, change)
        except:
            return 0.0

    # ------------------------------------------------------------------
    # Pushback generator
    # ------------------------------------------------------------------
    def _generate_pushback(self, evidence, text):
        if evidence["test_pass_ratio"] < 0.5:
            return "Tests are still failing. Show a passing case."

        if evidence["lint_errors"] > 0:
            return f"There are {evidence['lint_errors']} lint errors. Fix them."

        if not evidence["doc_found"]:
            return "Provide documentation or reference."

        if "because" not in text:
            return "Explain why this works."

        if len(text.split()) < 20:
            return "Too brief. Expand your reasoning."

        return "Not convinced yet. Give a concrete example."

    # ------------------------------------------------------------------
    # Score
    # ------------------------------------------------------------------
    def get_negotiation_score(self) -> float:
        penalty = 0.1 * min(3, self._pushback_count)
        return max(0.0, min(1.0, self._confidence - penalty))