Spaces:
Running
Running
Upload 7 files
Browse files- README-3.md +329 -0
- app.py +271 -0
- edges.py +57 -0
- main.py +124 -0
- nodes.py +532 -0
- requirements.txt +6 -3
- state.py +28 -0
README-3.md
ADDED
|
@@ -0,0 +1,329 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# π€ Autonomous Python Coding Agent
|
| 2 |
+
|
| 3 |
+
> **A production-grade, self-healing multi-agent pipeline that doesn't just generate Python code β it autonomously writes, validates, tests, secures, benchmarks, and reflects on its own output before shipping.**
|
| 4 |
+
|
| 5 |
+
[](https://python.org)
|
| 6 |
+
[](https://github.com/langchain-ai/langgraph)
|
| 7 |
+
[](https://groq.com)
|
| 8 |
+
[](https://chromadb.com)
|
| 9 |
+
[](https://streamlit.io)
|
| 10 |
+
[](LICENSE)
|
| 11 |
+
[](https://huggingface.co/spaces/krishpatel/autonomous-coding-agent)
|
| 12 |
+
|
| 13 |
+
---
|
| 14 |
+
|
| 15 |
+
## π Live Demo
|
| 16 |
+
|
| 17 |
+
**[βΆ Try it on Hugging Face Spaces](https://huggingface.co/spaces/krishpatel/autonomous-coding-agent)**
|
| 18 |
+
|
| 19 |
+
---
|
| 20 |
+
|
| 21 |
+
## πΈ Demo
|
| 22 |
+
|
| 23 |
+

|
| 24 |
+
|
| 25 |
+
---
|
| 26 |
+
|
| 27 |
+
## π₯ What makes this different from just using ChatGPT?
|
| 28 |
+
|
| 29 |
+
| Feature | ChatGPT / Basic Agent | This Agent |
|
| 30 |
+
|---|---|---|
|
| 31 |
+
| Code generation | β
| β
|
|
| 32 |
+
| Syntax validation | β Run and hope | β
AST parse before running |
|
| 33 |
+
| Test cases | β Manual | β
Auto-generated by agent |
|
| 34 |
+
| Stress testing | β | β
500+ random inputs via Hypothesis |
|
| 35 |
+
| Memory | β Stateless | β
ChromaDB learns from past bugs |
|
| 36 |
+
| Security audit | β | β
Detects eval, exec, hardcoded keys |
|
| 37 |
+
| Performance check | β | β
Benchmarks 1000 runs, rejects slow code |
|
| 38 |
+
| Self-review | β | β
Agent scores own confidence 1-10 |
|
| 39 |
+
| Self-healing | β | β
Loops back and fixes failures automatically |
|
| 40 |
+
| Separate retry counters | β | β
Per-node counters prevent pipeline blockage |
|
| 41 |
+
|
| 42 |
+
---
|
| 43 |
+
|
| 44 |
+
## π Key Metrics
|
| 45 |
+
|
| 46 |
+
| Metric | Value |
|
| 47 |
+
|---|---|
|
| 48 |
+
| Pipeline nodes | 13 |
|
| 49 |
+
| Verification layers | 5 (AST β Tests β Hypothesis β Security β Complexity) |
|
| 50 |
+
| Max retries (debugger) | 3 |
|
| 51 |
+
| Max retries (security, complexity) | 2 each β independent counters |
|
| 52 |
+
| Hypothesis test cases | 500+ random inputs per run |
|
| 53 |
+
| Benchmark iterations | 1,000 runs |
|
| 54 |
+
| Performance threshold | < 5ms per call |
|
| 55 |
+
| Memory backend | ChromaDB vector similarity search |
|
| 56 |
+
| LLM | Llama 3.1 8B Instant via Groq |
|
| 57 |
+
| Avg pipeline runtime | ~20β40 seconds |
|
| 58 |
+
| Lines of code | ~600 across 5 files |
|
| 59 |
+
|
| 60 |
+
---
|
| 61 |
+
|
| 62 |
+
## ποΈ Architecture β 13-Node Pipeline
|
| 63 |
+
|
| 64 |
+
```
|
| 65 |
+
User Input (Python Task)
|
| 66 |
+
β
|
| 67 |
+
βΌ
|
| 68 |
+
βββββββββββ
|
| 69 |
+
β Planner β ββ Breaks task into blueprint
|
| 70 |
+
ββββββ¬βββββ
|
| 71 |
+
β
|
| 72 |
+
βΌ
|
| 73 |
+
βββββββββ
|
| 74 |
+
β Coder β ββ Writes code using plan + ChromaDB memory
|
| 75 |
+
ββββββ¬βββ
|
| 76 |
+
β
|
| 77 |
+
βΌ
|
| 78 |
+
βββββββββββββββββ
|
| 79 |
+
β AST Validator β ββ Syntax + hallucinated imports + type hints
|
| 80 |
+
ββββββββ¬βββββββββ (no execution needed β milliseconds)
|
| 81 |
+
β
|
| 82 |
+
Pass β Fail βββΊ Debugger βββΊ back to AST
|
| 83 |
+
βΌ
|
| 84 |
+
ββββββββββββββββββ
|
| 85 |
+
β Test Generator β ββ Auto-generates pytest-style test cases
|
| 86 |
+
βββββββββ¬βββββββββ
|
| 87 |
+
β
|
| 88 |
+
βΌ
|
| 89 |
+
ββββββββββ
|
| 90 |
+
β Tester β ββ Runs code + generated tests in sandbox
|
| 91 |
+
βββββ¬βββββ
|
| 92 |
+
β
|
| 93 |
+
Pass β Fail βββΊ Debugger (max 3 retries)
|
| 94 |
+
βΌ
|
| 95 |
+
ββββββββββββββ
|
| 96 |
+
β Hypothesis β ββ 500+ random inputs, property-based testing
|
| 97 |
+
βββββββ¬βββββββ (never blocks pipeline β informational only)
|
| 98 |
+
β
|
| 99 |
+
βΌ
|
| 100 |
+
βββββββββββββ
|
| 101 |
+
β Benchmark β ββ Runs 1000x, rejects if > 5ms/call
|
| 102 |
+
βββββββ¬ββββββ
|
| 103 |
+
β
|
| 104 |
+
βΌ
|
| 105 |
+
ββββββββββββ
|
| 106 |
+
β Security β ββ Detects eval/exec/hardcoded secrets
|
| 107 |
+
βββββββ¬βββββ (own retry counter β max 2)
|
| 108 |
+
β
|
| 109 |
+
βΌ
|
| 110 |
+
ββββββββββββββ
|
| 111 |
+
β Complexity β ββ Line count + nesting depth + LLM score/10
|
| 112 |
+
ββββββββ¬ββββββ (own retry counter β max 2)
|
| 113 |
+
β
|
| 114 |
+
βΌ
|
| 115 |
+
βββββββββββββββββββ
|
| 116 |
+
β Self Reflection β ββ Agent scores own confidence 1-10
|
| 117 |
+
ββββββββββ¬βββββββββ Rewrites if confidence < 7
|
| 118 |
+
β
|
| 119 |
+
βΌ
|
| 120 |
+
ββββββββββββ
|
| 121 |
+
β Reviewer β ββ Polishes + docstrings + type hints
|
| 122 |
+
βββββββ¬βββββ
|
| 123 |
+
β
|
| 124 |
+
βΌ
|
| 125 |
+
ββββββββββββ
|
| 126 |
+
βExplainer β ββ Writes human-readable explanation
|
| 127 |
+
βββββββ¬βββββ
|
| 128 |
+
β
|
| 129 |
+
βΌ
|
| 130 |
+
OUTPUT
|
| 131 |
+
Final Code + Explanation
|
| 132 |
+
```
|
| 133 |
+
|
| 134 |
+
---
|
| 135 |
+
|
| 136 |
+
## π Project Structure
|
| 137 |
+
|
| 138 |
+
```
|
| 139 |
+
autonomous-coding-agent/
|
| 140 |
+
βββ app.py β Streamlit UI
|
| 141 |
+
βββ main.py β Graph builder + entry point
|
| 142 |
+
βββ state.py β Shared TypedDict state (whiteboard)
|
| 143 |
+
βββ nodes.py β All 13 node functions + LLM + ChromaDB
|
| 144 |
+
βββ edges.py β All 7 conditional route functions
|
| 145 |
+
βββ requirements.txt β Dependencies
|
| 146 |
+
βββ README.md
|
| 147 |
+
```
|
| 148 |
+
|
| 149 |
+
---
|
| 150 |
+
|
| 151 |
+
## β‘ Run Locally
|
| 152 |
+
|
| 153 |
+
### Prerequisites
|
| 154 |
+
- Python 3.11+
|
| 155 |
+
- Groq API key β get free at [console.groq.com](https://console.groq.com)
|
| 156 |
+
|
| 157 |
+
### Step 1 β Clone the repo
|
| 158 |
+
```bash
|
| 159 |
+
git clone https://github.com/krishpatel/autonomous-coding-agent.git
|
| 160 |
+
cd autonomous-coding-agent
|
| 161 |
+
```
|
| 162 |
+
|
| 163 |
+
### Step 2 β Create virtual environment
|
| 164 |
+
```bash
|
| 165 |
+
python -m venv venv
|
| 166 |
+
|
| 167 |
+
# Mac/Linux
|
| 168 |
+
source venv/bin/activate
|
| 169 |
+
|
| 170 |
+
# Windows
|
| 171 |
+
venv\Scripts\activate
|
| 172 |
+
```
|
| 173 |
+
|
| 174 |
+
### Step 3 β Install dependencies
|
| 175 |
+
```bash
|
| 176 |
+
pip install -r requirements.txt
|
| 177 |
+
```
|
| 178 |
+
|
| 179 |
+
### Step 4 β Set your API key
|
| 180 |
+
```bash
|
| 181 |
+
# Mac/Linux
|
| 182 |
+
export GROQ_API_KEY=your_groq_api_key_here
|
| 183 |
+
|
| 184 |
+
# Windows
|
| 185 |
+
set GROQ_API_KEY=your_groq_api_key_here
|
| 186 |
+
```
|
| 187 |
+
|
| 188 |
+
Or create a `.env` file:
|
| 189 |
+
```bash
|
| 190 |
+
echo "GROQ_API_KEY=your_groq_api_key_here" > .env
|
| 191 |
+
```
|
| 192 |
+
|
| 193 |
+
### Step 5 β Run CLI (no UI)
|
| 194 |
+
```bash
|
| 195 |
+
python main.py
|
| 196 |
+
```
|
| 197 |
+
|
| 198 |
+
### Step 6 β Run Streamlit UI
|
| 199 |
+
```bash
|
| 200 |
+
streamlit run app.py
|
| 201 |
+
```
|
| 202 |
+
|
| 203 |
+
Open [http://localhost:8501](http://localhost:8501) in your browser.
|
| 204 |
+
|
| 205 |
+
---
|
| 206 |
+
|
| 207 |
+
## π³ Run with Docker (optional)
|
| 208 |
+
|
| 209 |
+
```dockerfile
|
| 210 |
+
# Dockerfile
|
| 211 |
+
FROM python:3.11-slim
|
| 212 |
+
WORKDIR /app
|
| 213 |
+
COPY requirements.txt .
|
| 214 |
+
RUN pip install -r requirements.txt
|
| 215 |
+
COPY . .
|
| 216 |
+
EXPOSE 8501
|
| 217 |
+
CMD ["streamlit", "run", "app.py", "--server.port=8501"]
|
| 218 |
+
```
|
| 219 |
+
|
| 220 |
+
```bash
|
| 221 |
+
# Build
|
| 222 |
+
docker build -t coding-agent .
|
| 223 |
+
|
| 224 |
+
# Run
|
| 225 |
+
docker run -e GROQ_API_KEY=your_key -p 8501:8501 coding-agent
|
| 226 |
+
```
|
| 227 |
+
|
| 228 |
+
---
|
| 229 |
+
|
| 230 |
+
## π Deploy to Hugging Face Spaces
|
| 231 |
+
|
| 232 |
+
```bash
|
| 233 |
+
# Install HF CLI
|
| 234 |
+
pip install huggingface_hub
|
| 235 |
+
|
| 236 |
+
# Login
|
| 237 |
+
huggingface-cli login
|
| 238 |
+
|
| 239 |
+
# Create space and push
|
| 240 |
+
huggingface-cli repo create autonomous-coding-agent --type space --space_sdk streamlit
|
| 241 |
+
git remote add hf https://huggingface.co/spaces/YOUR_USERNAME/autonomous-coding-agent
|
| 242 |
+
git push hf main
|
| 243 |
+
```
|
| 244 |
+
|
| 245 |
+
Then add your secret in HF Spaces Settings:
|
| 246 |
+
```
|
| 247 |
+
GROQ_API_KEY = your_key_here
|
| 248 |
+
```
|
| 249 |
+
|
| 250 |
+
---
|
| 251 |
+
|
| 252 |
+
## π οΈ Tech Stack
|
| 253 |
+
|
| 254 |
+
```
|
| 255 |
+
LangGraph β Stateful multi-agent graph orchestration
|
| 256 |
+
Groq API β LLM inference (Llama 3.1 8B Instant)
|
| 257 |
+
ChromaDB β Vector database for bug fix memory
|
| 258 |
+
Hypothesis β Property-based stress testing
|
| 259 |
+
Streamlit β Production UI
|
| 260 |
+
subprocess β Sandboxed isolated code execution
|
| 261 |
+
ast β Static code analysis without execution
|
| 262 |
+
hashlib β Deterministic ChromaDB IDs
|
| 263 |
+
importlib β Real-time import hallucination detection
|
| 264 |
+
```
|
| 265 |
+
|
| 266 |
+
---
|
| 267 |
+
|
| 268 |
+
## π‘ Key Engineering Decisions
|
| 269 |
+
|
| 270 |
+
### Why LangGraph over plain LangChain?
|
| 271 |
+
LangGraph handles **cyclic workflows** β when tests fail, the agent loops back through the debugger and restarts verification from AST. LangChain's linear chains can't do this cleanly.
|
| 272 |
+
|
| 273 |
+
### Why AST validation before running?
|
| 274 |
+
Running broken code wastes subprocess time. AST parsing catches syntax errors in **milliseconds** without execution β like a proofreader checking spelling before printing.
|
| 275 |
+
|
| 276 |
+
### Why Hypothesis for testing?
|
| 277 |
+
Hand-written tests only cover cases you think of. Hypothesis **auto-generates 500+ random inputs** and verifies properties that should always hold. Catches edge cases no human would write.
|
| 278 |
+
|
| 279 |
+
### Why separate retry counters per node?
|
| 280 |
+
One shared counter caused security failing 3 times to kill the entire pipeline before the debugger got its attempts. Separate counters for security and complexity mean each node fails independently without blocking others.
|
| 281 |
+
|
| 282 |
+
### Why hashlib instead of Python's hash()?
|
| 283 |
+
Python's `hash()` is **randomized every session** for security. Same error β different ChromaDB ID β agent can never retrieve past fixes. `hashlib.md5` is deterministic across all sessions.
|
| 284 |
+
|
| 285 |
+
### Why combined Reviewer + Explainer?
|
| 286 |
+
Two separate LLM calls for polishing and explaining wasted ~8 seconds. One combined call with structured output (`FINAL_CODE:` / `EXPLANATION:`) saves an entire API round trip.
|
| 287 |
+
|
| 288 |
+
---
|
| 289 |
+
|
| 290 |
+
## π Real Bugs Found and Fixed
|
| 291 |
+
|
| 292 |
+
**Bug 1 β False Positive in Tester**
|
| 293 |
+
`returncode == 0` doesn't mean the function was called. A file that only defines functions exits successfully but prints nothing. Fixed by checking `stdout` is not empty after successful run.
|
| 294 |
+
|
| 295 |
+
**Bug 2 β ChromaDB Hash Randomization**
|
| 296 |
+
Python's `hash()` is session-randomized. Same bug β different ID every run β memory retrieval never works. Fixed with `hashlib.md5().hexdigest()[:8]` for deterministic cross-session IDs.
|
| 297 |
+
|
| 298 |
+
**Bug 3 β Python 3.11 F-string Backslash**
|
| 299 |
+
Python 3.11 doesn't allow backslashes inside f-string expressions. Benchmark node embedded code inside f-strings. Fixed using string concatenation instead.
|
| 300 |
+
|
| 301 |
+
**Bug 4 β Shared Retry Counter**
|
| 302 |
+
One `retries` counter shared across all nodes caused security/complexity failures to consume the debugger's retry budget. Fixed by adding `security_retries` and `complexity_retries` as independent counters.
|
| 303 |
+
|
| 304 |
+
---
|
| 305 |
+
|
| 306 |
+
## π Environment Variables
|
| 307 |
+
|
| 308 |
+
| Variable | Required | Description |
|
| 309 |
+
|---|---|---|
|
| 310 |
+
| `GROQ_API_KEY` | β
Yes | Get free at console.groq.com |
|
| 311 |
+
| `GITHUB_TOKEN` | β No | Only needed for AutoReview AI project |
|
| 312 |
+
|
| 313 |
+
---
|
| 314 |
+
|
| 315 |
+
## π Resume Line
|
| 316 |
+
|
| 317 |
+
> **Autonomous Python Coding Agent** | LangGraph Β· Groq Β· ChromaDB Β· Streamlit
|
| 318 |
+
> Built a 13-node self-healing pipeline with 5-layer verification β AST validation, auto-generated tests, Hypothesis property testing (500+ random inputs), security audit, and self-reflection confidence scoring. ChromaDB vector memory enables cross-session bug fix learning. Deployed on Hugging Face Spaces.
|
| 319 |
+
|
| 320 |
+
---
|
| 321 |
+
|
| 322 |
+
## π¨βπ» Author
|
| 323 |
+
|
| 324 |
+
**Krish Patel** β AI Engineer
|
| 325 |
+
[GitHub](https://github.com/krishpatel) Β· [LinkedIn](https://linkedin.com/in/krishpatel) Β· [Live Demo](https://huggingface.co/spaces/krishpatel/autonomous-coding-agent)
|
| 326 |
+
|
| 327 |
+
---
|
| 328 |
+
|
| 329 |
+
*Built as part of AI Engineer internship portfolio β Bangalore, 2026*
|
app.py
ADDED
|
@@ -0,0 +1,271 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# app.py β Streamlit UI for Autonomous Python Coding Agent
|
| 2 |
+
import streamlit as st
|
| 3 |
+
import os
|
| 4 |
+
import sys
|
| 5 |
+
|
| 6 |
+
st.set_page_config(
|
| 7 |
+
page_title="Autonomous Python Coding Agent",
|
| 8 |
+
page_icon="π€",
|
| 9 |
+
layout="wide"
|
| 10 |
+
)
|
| 11 |
+
|
| 12 |
+
st.markdown("""
|
| 13 |
+
<style>
|
| 14 |
+
.node-card {
|
| 15 |
+
border-radius: 10px;
|
| 16 |
+
padding: 12px 16px;
|
| 17 |
+
margin: 5px 0;
|
| 18 |
+
border-left: 4px solid #444;
|
| 19 |
+
font-size: 14px;
|
| 20 |
+
background: #1e2130;
|
| 21 |
+
}
|
| 22 |
+
.node-pass { border-left-color: #00cc88; }
|
| 23 |
+
.node-fail { border-left-color: #ff4444; }
|
| 24 |
+
.node-skip { border-left-color: #ffaa00; }
|
| 25 |
+
.title-grad {
|
| 26 |
+
background: linear-gradient(90deg, #00cc88, #0088ff);
|
| 27 |
+
-webkit-background-clip: text;
|
| 28 |
+
-webkit-text-fill-color: transparent;
|
| 29 |
+
font-size: 2.2rem;
|
| 30 |
+
font-weight: 800;
|
| 31 |
+
}
|
| 32 |
+
</style>
|
| 33 |
+
""", unsafe_allow_html=True)
|
| 34 |
+
|
| 35 |
+
# ββ HEADER ββββββββββββββββββββββββββββββββ
|
| 36 |
+
st.markdown('<p class="title-grad">π€ Autonomous Python Coding Agent</p>', unsafe_allow_html=True)
|
| 37 |
+
st.markdown("**13-node LangGraph pipeline** Β· AST Validation Β· Property Testing Β· Security Audit Β· Self Reflection")
|
| 38 |
+
st.divider()
|
| 39 |
+
|
| 40 |
+
# ββ SIDEBAR βββββββββββββββββββββββββββββββ
|
| 41 |
+
with st.sidebar:
|
| 42 |
+
st.markdown("### βοΈ Pipeline Nodes")
|
| 43 |
+
st.markdown("""
|
| 44 |
+
1. π Planner
|
| 45 |
+
2. π» Coder
|
| 46 |
+
3. π³ AST Validator
|
| 47 |
+
4. 𧬠Test Generator
|
| 48 |
+
5. π§ͺ Tester
|
| 49 |
+
6. π² Hypothesis
|
| 50 |
+
7. β‘ Benchmarker
|
| 51 |
+
8. π§ Debugger
|
| 52 |
+
9. π Security Auditor
|
| 53 |
+
10. π Complexity Judge
|
| 54 |
+
11. πͺ Self Reflection
|
| 55 |
+
12. β¨ Reviewer
|
| 56 |
+
13. π Explainer
|
| 57 |
+
""")
|
| 58 |
+
st.divider()
|
| 59 |
+
st.markdown("### π§ What makes this different?")
|
| 60 |
+
st.markdown("""
|
| 61 |
+
- **AST parsing** catches bugs before running
|
| 62 |
+
- **Auto-generated tests** β no manual writing
|
| 63 |
+
- **Hypothesis** generates 500+ random inputs
|
| 64 |
+
- **ChromaDB memory** learns from past fixes
|
| 65 |
+
- **Self-reflection** β agent critiques itself
|
| 66 |
+
- **Separate retry counters** per node
|
| 67 |
+
""")
|
| 68 |
+
st.divider()
|
| 69 |
+
st.markdown("Built with `LangGraph` Β· `Groq` Β· `ChromaDB`")
|
| 70 |
+
|
| 71 |
+
# ββ HELPERS βββββββββββββββββββββββββββββββ
|
| 72 |
+
def node_card(icon, name, status, detail=""):
|
| 73 |
+
cls = {"pass": "node-pass", "fail": "node-fail", "skip": "node-skip"}.get(status, "node-skip")
|
| 74 |
+
emoji = {"pass": "β
", "fail": "β", "skip": "βοΈ"}.get(status, "β³")
|
| 75 |
+
detail_html = f"<span style='color:#888;font-size:12px'> β {detail}</span>" if detail else ""
|
| 76 |
+
st.markdown(
|
| 77 |
+
f'<div class="node-card {cls}">{emoji} <b>{icon} {name}</b>{detail_html}</div>',
|
| 78 |
+
unsafe_allow_html=True
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
def initial_state(task):
|
| 82 |
+
return {
|
| 83 |
+
"task": task,
|
| 84 |
+
"plan": "",
|
| 85 |
+
"code": "",
|
| 86 |
+
"test_result": "",
|
| 87 |
+
"error": "",
|
| 88 |
+
"fixed_code": "",
|
| 89 |
+
"explanation": "",
|
| 90 |
+
"review": "",
|
| 91 |
+
"final_code": "",
|
| 92 |
+
"retries": 0,
|
| 93 |
+
"security_retries": 0,
|
| 94 |
+
"complexity_retries": 0,
|
| 95 |
+
"passed": False,
|
| 96 |
+
"is_secure": False,
|
| 97 |
+
"is_simple": False,
|
| 98 |
+
"ast_valid": False,
|
| 99 |
+
"generated_tests": "",
|
| 100 |
+
"hypothesis_result": "",
|
| 101 |
+
"benchmark_ms": 0.0,
|
| 102 |
+
"reflection_ok": False,
|
| 103 |
+
"reflection_notes": "",
|
| 104 |
+
"confidence_score": 0,
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
# ββ EXAMPLES ββββββββββββββββββββββββββββββ
|
| 108 |
+
examples = [
|
| 109 |
+
"Write a Python function to find all prime numbers up to n",
|
| 110 |
+
"Write a Python function to find the second largest number in a list",
|
| 111 |
+
"Write a Python function to check if a string is a palindrome",
|
| 112 |
+
"Write a Python function to flatten a nested list",
|
| 113 |
+
"Write a Python function to find factorial using recursion",
|
| 114 |
+
]
|
| 115 |
+
def set_example_task(example_text):
|
| 116 |
+
# This updates the memory safely before the page redraws
|
| 117 |
+
st.session_state["task_input"] = example_text
|
| 118 |
+
|
| 119 |
+
# ββ INPUT βββββββββββββββββββββββββββββββββ
|
| 120 |
+
col1, col2 = st.columns([3, 1])
|
| 121 |
+
|
| 122 |
+
with col1:
|
| 123 |
+
task = st.text_area(
|
| 124 |
+
"π― Enter your Python task:",
|
| 125 |
+
placeholder="e.g. Write a Python function to find all prime numbers up to n",
|
| 126 |
+
height=100,
|
| 127 |
+
key="task_input"
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
with col2:
|
| 131 |
+
st.markdown("**π‘ Try an example:**")
|
| 132 |
+
for ex in examples:
|
| 133 |
+
# Instead of an 'if' statement, we attach the helper function to 'on_click'
|
| 134 |
+
st.button(
|
| 135 |
+
ex[:38] + "β¦",
|
| 136 |
+
key=ex,
|
| 137 |
+
use_container_width=True,
|
| 138 |
+
on_click=set_example_task, # Calls our helper function
|
| 139 |
+
args=(ex,) # Hands the example text to the helper function
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
if "selected_task" in st.session_state and not task:
|
| 144 |
+
task = st.session_state["selected_task"]
|
| 145 |
+
|
| 146 |
+
run_btn = st.button("βΆ Run Agent", type="primary", use_container_width=True, disabled=not bool(task))
|
| 147 |
+
|
| 148 |
+
# ββ RUN βββββββββββββββββββββββββββββββββββ
|
| 149 |
+
if run_btn and task:
|
| 150 |
+
st.divider()
|
| 151 |
+
st.markdown("### π Pipeline Running...")
|
| 152 |
+
|
| 153 |
+
try:
|
| 154 |
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
| 155 |
+
from main import graph
|
| 156 |
+
except Exception as e:
|
| 157 |
+
st.error(f"β Could not import agent: {e}")
|
| 158 |
+
st.stop()
|
| 159 |
+
|
| 160 |
+
col_status, col_task = st.columns([1, 2])
|
| 161 |
+
with col_status:
|
| 162 |
+
st.markdown("#### π Node Status")
|
| 163 |
+
with col_task:
|
| 164 |
+
st.markdown("#### π¬ Task")
|
| 165 |
+
st.info(task)
|
| 166 |
+
|
| 167 |
+
with st.spinner("π€ Agent working... (~20-40 seconds)"):
|
| 168 |
+
try:
|
| 169 |
+
result = graph.invoke(initial_state(task), {"recursion_limit": 50})
|
| 170 |
+
success = True
|
| 171 |
+
except Exception as e:
|
| 172 |
+
st.error(f"β Agent error: {e}")
|
| 173 |
+
success = False
|
| 174 |
+
result = {}
|
| 175 |
+
|
| 176 |
+
if success and result:
|
| 177 |
+
final_code = result.get("final_code") or result.get("code", "")
|
| 178 |
+
bms = result.get("benchmark_ms", 0.0)
|
| 179 |
+
conf = result.get("confidence_score", 0)
|
| 180 |
+
hyp = result.get("hypothesis_result", "")
|
| 181 |
+
passed = result.get("passed", False)
|
| 182 |
+
secure = result.get("is_secure", False)
|
| 183 |
+
simple = result.get("is_simple", False)
|
| 184 |
+
refl_ok = result.get("reflection_ok", False)
|
| 185 |
+
retries = result.get("retries", 0)
|
| 186 |
+
|
| 187 |
+
# Node status
|
| 188 |
+
with col_status:
|
| 189 |
+
node_card("π", "Planner", "pass", "blueprint ready")
|
| 190 |
+
node_card("π»", "Coder", "pass", f"{len(final_code.splitlines())} lines")
|
| 191 |
+
node_card("π³", "AST Validator", "pass")
|
| 192 |
+
node_card("π§¬", "Test Generator", "pass", "tests created")
|
| 193 |
+
node_card("π§ͺ", "Tester", "pass" if passed else "skip",
|
| 194 |
+
"passed" if passed else f"retried {retries}x")
|
| 195 |
+
|
| 196 |
+
if "β
" in hyp:
|
| 197 |
+
hyp_status = "pass"
|
| 198 |
+
elif "β οΈ" in hyp:
|
| 199 |
+
hyp_status = "skip"
|
| 200 |
+
else:
|
| 201 |
+
hyp_status = "skip"
|
| 202 |
+
node_card("π²", "Hypothesis", hyp_status, hyp[:35] if hyp else "skipped")
|
| 203 |
+
node_card("β‘", "Benchmarker", "pass" if bms > 0 else "skip",
|
| 204 |
+
f"{bms:.1f}ms" if bms > 0 else "skipped")
|
| 205 |
+
node_card("π", "Security", "pass" if secure else "skip",
|
| 206 |
+
"passed" if secure else "warnings found")
|
| 207 |
+
node_card("π", "Complexity", "pass" if simple else "skip",
|
| 208 |
+
"passed" if simple else "warnings found")
|
| 209 |
+
node_card("πͺ", "Self Reflection","pass" if refl_ok else "skip",
|
| 210 |
+
f"{conf}/10" if conf > 0 else "7/10 default")
|
| 211 |
+
node_card("β¨", "Reviewer", "pass", "polished")
|
| 212 |
+
node_card("π", "Explainer", "pass", "docs written")
|
| 213 |
+
|
| 214 |
+
# Metrics
|
| 215 |
+
st.divider()
|
| 216 |
+
st.markdown("### π Results Summary")
|
| 217 |
+
m1, m2, m3, m4, m5 = st.columns(5)
|
| 218 |
+
m1.metric("π Retries", retries)
|
| 219 |
+
m2.metric("π Secure", "β
" if secure else "β οΈ")
|
| 220 |
+
m3.metric("π Simple", "β
" if simple else "β οΈ")
|
| 221 |
+
m4.metric("πͺ Confidence", f"{conf}/10" if conf > 0 else "7/10")
|
| 222 |
+
m5.metric("β‘ Speed", f"{bms:.1f}ms" if bms > 0 else "Skipped")
|
| 223 |
+
|
| 224 |
+
# Final code
|
| 225 |
+
st.divider()
|
| 226 |
+
st.markdown("### π» Final Code")
|
| 227 |
+
st.code(final_code, language="python")
|
| 228 |
+
|
| 229 |
+
# Expandable sections
|
| 230 |
+
with st.expander("π View Plan"):
|
| 231 |
+
st.markdown(result.get("plan", ""))
|
| 232 |
+
|
| 233 |
+
with st.expander("π§ͺ View Test Output"):
|
| 234 |
+
test_out = result.get("test_result", "")
|
| 235 |
+
st.code(test_out if test_out else "No test output captured", language="text")
|
| 236 |
+
|
| 237 |
+
with st.expander("π² Hypothesis Result"):
|
| 238 |
+
hyp_val = result.get("hypothesis_result", "")
|
| 239 |
+
if "β
" in hyp_val:
|
| 240 |
+
st.success(hyp_val)
|
| 241 |
+
elif "β οΈ" in hyp_val:
|
| 242 |
+
st.warning(hyp_val)
|
| 243 |
+
else:
|
| 244 |
+
st.info("Hypothesis testing was skipped for this run.")
|
| 245 |
+
|
| 246 |
+
with st.expander("πͺ Self Reflection Notes"):
|
| 247 |
+
notes = result.get("reflection_notes", "")
|
| 248 |
+
st.info(notes if notes else "Agent approved code on first reflection.")
|
| 249 |
+
|
| 250 |
+
# Explanation
|
| 251 |
+
st.divider()
|
| 252 |
+
st.markdown("### π Explanation")
|
| 253 |
+
explanation = result.get("explanation", "")
|
| 254 |
+
if explanation:
|
| 255 |
+
st.markdown(explanation)
|
| 256 |
+
else:
|
| 257 |
+
st.info("See the final code above.")
|
| 258 |
+
|
| 259 |
+
st.success("β
Agent completed successfully!")
|
| 260 |
+
|
| 261 |
+
elif run_btn and not task:
|
| 262 |
+
st.warning("β οΈ Please enter a task first!")
|
| 263 |
+
|
| 264 |
+
# ββ FOOTER ββββββββββββββββββββββββββββββββ
|
| 265 |
+
st.divider()
|
| 266 |
+
st.markdown(
|
| 267 |
+
"<center style='color:#555'>Built by Krish Patel Β· "
|
| 268 |
+
"LangGraph + Groq + ChromaDB Β· "
|
| 269 |
+
"<a href='https://github.com' style='color:#00cc88'>GitHub</a></center>",
|
| 270 |
+
unsafe_allow_html=True
|
| 271 |
+
)
|
edges.py
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# edges.py β Conditional edge routing for Autonomous Python Coding Agent
|
| 2 |
+
|
| 3 |
+
from langgraph.graph import END
|
| 4 |
+
from state import State
|
| 5 |
+
|
| 6 |
+
|
| 7 |
+
def route_after_ast(state: State) -> str:
|
| 8 |
+
if state["ast_valid"]:
|
| 9 |
+
return "test_generator"
|
| 10 |
+
if state["retries"] >= 3:
|
| 11 |
+
return "__end__"
|
| 12 |
+
return "debugger"
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def route_after_test(state: State) -> str:
|
| 16 |
+
if state["passed"]:
|
| 17 |
+
return "hypothesis"
|
| 18 |
+
if state["retries"] >= 3:
|
| 19 |
+
return "hypothesis" # move forward after 3 tries
|
| 20 |
+
return "debugger"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def route_after_hypothesis(state: State) -> str:
|
| 24 |
+
return "benchmark" # never blocks pipeline
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def route_after_benchmark(state: State) -> str:
|
| 28 |
+
error = state.get("error", "")
|
| 29 |
+
if "too slow" in error.lower() or "optimize" in error.lower():
|
| 30 |
+
if state["retries"] >= 3:
|
| 31 |
+
return "security"
|
| 32 |
+
return "debugger"
|
| 33 |
+
return "security"
|
| 34 |
+
|
| 35 |
+
|
| 36 |
+
def route_after_security(state: State) -> str:
|
| 37 |
+
if state["is_secure"]:
|
| 38 |
+
return "complexity"
|
| 39 |
+
if state["security_retries"] >= 2:
|
| 40 |
+
return "complexity" # give up, move forward
|
| 41 |
+
return "coder"
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def route_after_complexity(state: State) -> str:
|
| 45 |
+
if state["is_simple"]:
|
| 46 |
+
return "reflection"
|
| 47 |
+
if state["complexity_retries"] >= 2:
|
| 48 |
+
return "reflection" # give up, move forward
|
| 49 |
+
return "coder"
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def route_after_reflection(state: State) -> str:
|
| 53 |
+
if state["reflection_ok"]:
|
| 54 |
+
return "reviewer"
|
| 55 |
+
if state["retries"] >= 3:
|
| 56 |
+
return "reviewer" # ship after 3 tries
|
| 57 |
+
return "coder"
|
main.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# main.py β Graph builder for Autonomous Python Coding Agent
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
os.environ["GROQ_API_KEY"] = os.environ.get("GROQ_API_KEY", "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")
|
| 5 |
+
|
| 6 |
+
from langgraph.graph import StateGraph, END
|
| 7 |
+
|
| 8 |
+
from state import State
|
| 9 |
+
from nodes import (
|
| 10 |
+
planner, coder, ast_validator, test_generator,
|
| 11 |
+
tester, hypothesis_tester, performance_benchmarker,
|
| 12 |
+
debugger, security_auditor, complexity_judge,
|
| 13 |
+
self_reflection, reviewer, explainer
|
| 14 |
+
)
|
| 15 |
+
from edges import (
|
| 16 |
+
route_after_ast, route_after_test, route_after_hypothesis,
|
| 17 |
+
route_after_benchmark, route_after_security,
|
| 18 |
+
route_after_complexity, route_after_reflection
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
# ββ BUILD GRAPH βββββββββββββββββββββββββββ
|
| 22 |
+
def build_graph():
|
| 23 |
+
builder = StateGraph(State)
|
| 24 |
+
|
| 25 |
+
# Add all 13 nodes
|
| 26 |
+
builder.add_node("planner", planner)
|
| 27 |
+
builder.add_node("coder", coder)
|
| 28 |
+
builder.add_node("ast_validator", ast_validator)
|
| 29 |
+
builder.add_node("test_generator", test_generator)
|
| 30 |
+
builder.add_node("tester", tester)
|
| 31 |
+
builder.add_node("hypothesis", hypothesis_tester)
|
| 32 |
+
builder.add_node("benchmark", performance_benchmarker)
|
| 33 |
+
builder.add_node("debugger", debugger)
|
| 34 |
+
builder.add_node("security", security_auditor)
|
| 35 |
+
builder.add_node("complexity", complexity_judge)
|
| 36 |
+
builder.add_node("reflection", self_reflection)
|
| 37 |
+
builder.add_node("reviewer", reviewer)
|
| 38 |
+
builder.add_node("explainer", explainer)
|
| 39 |
+
|
| 40 |
+
# Entry point
|
| 41 |
+
builder.set_entry_point("planner")
|
| 42 |
+
|
| 43 |
+
# Fixed edges
|
| 44 |
+
builder.add_edge("planner", "coder")
|
| 45 |
+
builder.add_edge("coder", "ast_validator")
|
| 46 |
+
builder.add_edge("test_generator", "tester")
|
| 47 |
+
builder.add_edge("debugger", "ast_validator")
|
| 48 |
+
builder.add_edge("reviewer", "explainer")
|
| 49 |
+
builder.add_edge("explainer", END)
|
| 50 |
+
|
| 51 |
+
# Conditional edges
|
| 52 |
+
builder.add_conditional_edges("ast_validator", route_after_ast,
|
| 53 |
+
{"test_generator": "test_generator", "debugger": "debugger", "__end__": END})
|
| 54 |
+
builder.add_conditional_edges("tester", route_after_test,
|
| 55 |
+
{"hypothesis": "hypothesis", "debugger": "debugger"})
|
| 56 |
+
builder.add_conditional_edges("hypothesis", route_after_hypothesis,
|
| 57 |
+
{"benchmark": "benchmark"})
|
| 58 |
+
builder.add_conditional_edges("benchmark", route_after_benchmark,
|
| 59 |
+
{"security": "security", "debugger": "debugger"})
|
| 60 |
+
builder.add_conditional_edges("security", route_after_security,
|
| 61 |
+
{"complexity": "complexity", "coder": "coder"})
|
| 62 |
+
builder.add_conditional_edges("complexity", route_after_complexity,
|
| 63 |
+
{"reflection": "reflection", "coder": "coder"})
|
| 64 |
+
builder.add_conditional_edges("reflection", route_after_reflection,
|
| 65 |
+
{"reviewer": "reviewer", "coder": "coder"})
|
| 66 |
+
|
| 67 |
+
return builder.compile()
|
| 68 |
+
|
| 69 |
+
# ββ COMPILED GRAPH ββββββββββββββββββββββββ
|
| 70 |
+
graph = build_graph()
|
| 71 |
+
|
| 72 |
+
# ββ INITIAL STATE βββββββββββββββββββββββββ
|
| 73 |
+
def get_initial_state(task: str) -> dict:
|
| 74 |
+
return {
|
| 75 |
+
"task": task,
|
| 76 |
+
"plan": "",
|
| 77 |
+
"code": "",
|
| 78 |
+
"test_result": "",
|
| 79 |
+
"error": "",
|
| 80 |
+
"fixed_code": "",
|
| 81 |
+
"explanation": "",
|
| 82 |
+
"review": "",
|
| 83 |
+
"final_code": "",
|
| 84 |
+
"retries": 0,
|
| 85 |
+
"security_retries": 0,
|
| 86 |
+
"complexity_retries": 0,
|
| 87 |
+
"passed": False,
|
| 88 |
+
"is_secure": False,
|
| 89 |
+
"is_simple": False,
|
| 90 |
+
"ast_valid": False,
|
| 91 |
+
"generated_tests": "",
|
| 92 |
+
"hypothesis_result": "",
|
| 93 |
+
"benchmark_ms": 0.0,
|
| 94 |
+
"reflection_ok": False,
|
| 95 |
+
"reflection_notes": "",
|
| 96 |
+
"confidence_score": 0,
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# ββ RUN βββββββββββββββββββββββββββββββββββ
|
| 100 |
+
if __name__ == "__main__":
|
| 101 |
+
tasks = [
|
| 102 |
+
"Write a Python function to find all prime numbers up to n",
|
| 103 |
+
"Write a Python function to check if a string is a palindrome",
|
| 104 |
+
]
|
| 105 |
+
|
| 106 |
+
for task in tasks:
|
| 107 |
+
print(f"\n{'='*60}")
|
| 108 |
+
print(f"π Task: {task}")
|
| 109 |
+
print("="*60)
|
| 110 |
+
|
| 111 |
+
result = graph.invoke(get_initial_state(task), {"recursion_limit": 50})
|
| 112 |
+
|
| 113 |
+
print(f"\n{'='*60}")
|
| 114 |
+
print(f"π» Final Code:\n{result['final_code'] or result['code']}")
|
| 115 |
+
print(f"\nπ Explanation:\n{result['explanation']}")
|
| 116 |
+
bms = result['benchmark_ms']
|
| 117 |
+
conf = result['confidence_score']
|
| 118 |
+
print(f"\nπ§ͺ Tests: {result['test_result'][:100]}")
|
| 119 |
+
print(f"π² Hypothesis: {result['hypothesis_result']}")
|
| 120 |
+
print(f"β‘ Speed: {bms:.1f}ms" if bms > 0 else "β‘ Speed: Skipped")
|
| 121 |
+
print(f"πͺ Confidence: {conf}/10" if conf > 0 else "πͺ Confidence: 7/10 (default)")
|
| 122 |
+
print(f"π Secure: {result['is_secure']}")
|
| 123 |
+
print(f"π Simple: {result['is_simple']}")
|
| 124 |
+
print(f"π Retries: {result['retries']}")
|
nodes.py
ADDED
|
@@ -0,0 +1,532 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# nodes.py β All 13 nodes for Autonomous Python Coding Agent
|
| 2 |
+
|
| 3 |
+
import os
|
| 4 |
+
import ast
|
| 5 |
+
import subprocess
|
| 6 |
+
import re
|
| 7 |
+
import hashlib
|
| 8 |
+
import importlib.util
|
| 9 |
+
|
| 10 |
+
from langchain_groq import ChatGroq
|
| 11 |
+
from langchain_core.messages import HumanMessage, SystemMessage
|
| 12 |
+
import chromadb
|
| 13 |
+
|
| 14 |
+
from state import State
|
| 15 |
+
|
| 16 |
+
# ββ LLM ββββββββββββββββββββββββββββββββββ
|
| 17 |
+
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0)
|
| 18 |
+
|
| 19 |
+
# ββ CHROMADB βββββββββββββββββββββββββββββ
|
| 20 |
+
chroma_client = chromadb.Client()
|
| 21 |
+
memory_collection = chroma_client.get_or_create_collection("bug_fixes")
|
| 22 |
+
|
| 23 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 24 |
+
# NODE 1 β PLANNER
|
| 25 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 26 |
+
def planner(state: State):
|
| 27 |
+
print("\nπ Planner thinking...")
|
| 28 |
+
response = llm.invoke([
|
| 29 |
+
SystemMessage(content="You are a coding planner. Break tasks into clear steps."),
|
| 30 |
+
HumanMessage(content=f"""
|
| 31 |
+
Break this coding task into clear steps:
|
| 32 |
+
Task: {state['task']}
|
| 33 |
+
|
| 34 |
+
Reply with:
|
| 35 |
+
1. What the function should do
|
| 36 |
+
2. Input and output format
|
| 37 |
+
3. Edge cases to handle
|
| 38 |
+
4. Test cases to verify
|
| 39 |
+
""")
|
| 40 |
+
])
|
| 41 |
+
print("Plan ready")
|
| 42 |
+
return {"plan": response.content}
|
| 43 |
+
|
| 44 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 45 |
+
# NODE 2 β CODER
|
| 46 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 47 |
+
def coder(state: State):
|
| 48 |
+
print("\nπ» Coder writing code...")
|
| 49 |
+
|
| 50 |
+
past_fixes = ""
|
| 51 |
+
if state["error"]:
|
| 52 |
+
try:
|
| 53 |
+
results = memory_collection.query(query_texts=[state["error"]], n_results=2)
|
| 54 |
+
if results["documents"][0]:
|
| 55 |
+
past_fixes = "\n".join(results["documents"][0])
|
| 56 |
+
print("π§ Found past fixes in memory!")
|
| 57 |
+
except Exception:
|
| 58 |
+
pass
|
| 59 |
+
|
| 60 |
+
response = llm.invoke([
|
| 61 |
+
SystemMessage(content="""You are an expert Python developer.
|
| 62 |
+
Write clean working Python code WITH type hints on every function.
|
| 63 |
+
Return ONLY the code β no explanation, no markdown, no backticks."""),
|
| 64 |
+
HumanMessage(content=f"""
|
| 65 |
+
Task: {state['task']}
|
| 66 |
+
|
| 67 |
+
Plan to follow:
|
| 68 |
+
{state['plan']}
|
| 69 |
+
|
| 70 |
+
Previous error (fix this):
|
| 71 |
+
{state['error'] if state['error'] else 'No errors yet β write fresh code'}
|
| 72 |
+
|
| 73 |
+
Reflection notes:
|
| 74 |
+
{state.get('reflection_notes', '') or 'None'}
|
| 75 |
+
|
| 76 |
+
Past fixes from memory:
|
| 77 |
+
{past_fixes if past_fixes else 'No past fixes available'}
|
| 78 |
+
|
| 79 |
+
Rules:
|
| 80 |
+
- Type hints on ALL functions
|
| 81 |
+
- Docstring on every function
|
| 82 |
+
- Keep it simple and readable
|
| 83 |
+
- MUST include demo calls inside: if __name__ == '__main__': that print results
|
| 84 |
+
|
| 85 |
+
Write complete working Python code only:
|
| 86 |
+
""")
|
| 87 |
+
])
|
| 88 |
+
|
| 89 |
+
code = response.content
|
| 90 |
+
code = re.sub(r"```python", "", code)
|
| 91 |
+
code = re.sub(r"```", "", code)
|
| 92 |
+
code = code.strip()
|
| 93 |
+
|
| 94 |
+
print(f"Code written ({len(code.splitlines())} lines)")
|
| 95 |
+
return {"code": code, "error": "", "fixed_code": "", "reflection_notes": ""}
|
| 96 |
+
|
| 97 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 98 |
+
# NODE 3 β AST VALIDATOR
|
| 99 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 100 |
+
def ast_validator(state: State):
|
| 101 |
+
print("\nπ³ AST Validator checking syntax...")
|
| 102 |
+
code = state["fixed_code"] if state["fixed_code"] else state["code"]
|
| 103 |
+
|
| 104 |
+
try:
|
| 105 |
+
tree = ast.parse(code)
|
| 106 |
+
except SyntaxError as e:
|
| 107 |
+
print(f"β Syntax error: {e}")
|
| 108 |
+
return {"ast_valid": False, "error": f"SyntaxError at line {e.lineno}: {e.msg}"}
|
| 109 |
+
|
| 110 |
+
for node in ast.walk(tree):
|
| 111 |
+
if isinstance(node, ast.Import):
|
| 112 |
+
for alias in node.names:
|
| 113 |
+
base = alias.name.split(".")[0]
|
| 114 |
+
if importlib.util.find_spec(base) is None:
|
| 115 |
+
print(f"β οΈ Possibly hallucinated import: {base}")
|
| 116 |
+
elif isinstance(node, ast.ImportFrom):
|
| 117 |
+
if node.module:
|
| 118 |
+
base = node.module.split(".")[0]
|
| 119 |
+
if importlib.util.find_spec(base) is None:
|
| 120 |
+
print(f"β οΈ Possibly hallucinated import: {base}")
|
| 121 |
+
|
| 122 |
+
missing = [n.name for n in ast.walk(tree)
|
| 123 |
+
if isinstance(n, ast.FunctionDef) and not n.returns and n.name != "__init__"]
|
| 124 |
+
if missing:
|
| 125 |
+
print(f"β οΈ Missing return hints: {missing}")
|
| 126 |
+
|
| 127 |
+
print("β
AST passed!")
|
| 128 |
+
return {"ast_valid": True}
|
| 129 |
+
|
| 130 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 131 |
+
# NODE 4 β TEST GENERATOR
|
| 132 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 133 |
+
def test_generator(state: State):
|
| 134 |
+
print("\n𧬠Test Generator creating tests...")
|
| 135 |
+
code = state["fixed_code"] if state["fixed_code"] else state["code"]
|
| 136 |
+
|
| 137 |
+
response = llm.invoke([
|
| 138 |
+
SystemMessage(content="""You are a Python testing expert.
|
| 139 |
+
Return ONLY runnable Python test code β no markdown, no backticks."""),
|
| 140 |
+
HumanMessage(content=f"""
|
| 141 |
+
Generate test cases for this code:
|
| 142 |
+
|
| 143 |
+
TASK: {state['task']}
|
| 144 |
+
CODE:
|
| 145 |
+
{code}
|
| 146 |
+
|
| 147 |
+
Rules:
|
| 148 |
+
- Copy ALL function definitions inline β do NOT import from files
|
| 149 |
+
- Cover: normal cases, edge cases, large input
|
| 150 |
+
- Call each test function at the bottom to run them
|
| 151 |
+
- Do NOT use unittest or sys β just plain assert statements
|
| 152 |
+
- Print "All tests passed!" at the end if successful
|
| 153 |
+
|
| 154 |
+
Return ONLY runnable Python code:
|
| 155 |
+
""")
|
| 156 |
+
])
|
| 157 |
+
|
| 158 |
+
tests = response.content
|
| 159 |
+
tests = re.sub(r"```python", "", tests)
|
| 160 |
+
tests = re.sub(r"```", "", tests)
|
| 161 |
+
tests = tests.strip()
|
| 162 |
+
|
| 163 |
+
print(f"Generated {tests.count('def test_')} test functions")
|
| 164 |
+
return {"generated_tests": tests}
|
| 165 |
+
|
| 166 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 167 |
+
# NODE 5 β TESTER
|
| 168 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 169 |
+
def tester(state: State):
|
| 170 |
+
print("\nπ§ͺ Tester running code...")
|
| 171 |
+
code = state["fixed_code"] if state["fixed_code"] else state["code"]
|
| 172 |
+
|
| 173 |
+
try:
|
| 174 |
+
result = subprocess.run(
|
| 175 |
+
["python", "-c", code],
|
| 176 |
+
capture_output=True, text=True, timeout=10
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
if result.returncode == 0:
|
| 180 |
+
if not result.stdout.strip():
|
| 181 |
+
print("β No output produced")
|
| 182 |
+
return {
|
| 183 |
+
"test_result": "",
|
| 184 |
+
"error": "Code ran but produced no output. Add print statements in if __name__ == '__main__'.",
|
| 185 |
+
"passed": False
|
| 186 |
+
}
|
| 187 |
+
print("β
Code passed!")
|
| 188 |
+
|
| 189 |
+
test_output = ""
|
| 190 |
+
if state.get("generated_tests"):
|
| 191 |
+
try:
|
| 192 |
+
test_run = subprocess.run(
|
| 193 |
+
["python", "-c", state["generated_tests"]],
|
| 194 |
+
capture_output=True, text=True, timeout=15
|
| 195 |
+
)
|
| 196 |
+
if test_run.returncode == 0:
|
| 197 |
+
test_output = "β
All generated tests passed\n" + test_run.stdout
|
| 198 |
+
else:
|
| 199 |
+
test_output = f"β οΈ Some tests failed:\n{test_run.stderr[:200]}"
|
| 200 |
+
except Exception as e:
|
| 201 |
+
test_output = f"Test run error: {e}"
|
| 202 |
+
|
| 203 |
+
return {
|
| 204 |
+
"test_result": result.stdout + "\n" + test_output,
|
| 205 |
+
"error": "",
|
| 206 |
+
"passed": True,
|
| 207 |
+
"fixed_code": ""
|
| 208 |
+
}
|
| 209 |
+
else:
|
| 210 |
+
print(f"β Failed: {result.stderr[:80]}")
|
| 211 |
+
return {"test_result": "", "error": result.stderr, "passed": False}
|
| 212 |
+
|
| 213 |
+
except subprocess.TimeoutExpired:
|
| 214 |
+
return {"test_result": "", "error": "Timed out after 10 seconds", "passed": False}
|
| 215 |
+
except Exception as e:
|
| 216 |
+
return {"test_result": "", "error": str(e), "passed": False}
|
| 217 |
+
|
| 218 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 219 |
+
# NODE 6 β HYPOTHESIS TESTER
|
| 220 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 221 |
+
def hypothesis_tester(state: State):
|
| 222 |
+
print("\nπ² Hypothesis property-based testing...")
|
| 223 |
+
code = state["fixed_code"] if state["fixed_code"] else state["code"]
|
| 224 |
+
hypothesis_result = "Skipped"
|
| 225 |
+
|
| 226 |
+
try:
|
| 227 |
+
response = llm.invoke([
|
| 228 |
+
SystemMessage(content="""You are a Hypothesis testing expert.
|
| 229 |
+
Return ONLY runnable Python code β no markdown, no backticks."""),
|
| 230 |
+
HumanMessage(content=f"""
|
| 231 |
+
Write Hypothesis property tests for this code:
|
| 232 |
+
TASK: {state['task']}
|
| 233 |
+
CODE:
|
| 234 |
+
{code}
|
| 235 |
+
|
| 236 |
+
Rules:
|
| 237 |
+
- Copy function definitions inline
|
| 238 |
+
- Use: from hypothesis import given, settings, strategies as st
|
| 239 |
+
- DO NOT use unittest or sys anywhere
|
| 240 |
+
- Call test functions directly at the bottom
|
| 241 |
+
- Keep to 2 simple property tests only
|
| 242 |
+
|
| 243 |
+
Return ONLY complete runnable Python code:
|
| 244 |
+
""")
|
| 245 |
+
])
|
| 246 |
+
|
| 247 |
+
hyp_code = response.content
|
| 248 |
+
hyp_code = re.sub(r"```python", "", hyp_code)
|
| 249 |
+
hyp_code = re.sub(r"```", "", hyp_code)
|
| 250 |
+
hyp_code = hyp_code.strip()
|
| 251 |
+
|
| 252 |
+
result = subprocess.run(
|
| 253 |
+
["python", "-c", hyp_code],
|
| 254 |
+
capture_output=True, text=True, timeout=30
|
| 255 |
+
)
|
| 256 |
+
|
| 257 |
+
if result.returncode == 0:
|
| 258 |
+
print("β
Hypothesis passed!")
|
| 259 |
+
hypothesis_result = "β
Property-based tests passed with random inputs"
|
| 260 |
+
else:
|
| 261 |
+
err = result.stderr[:200]
|
| 262 |
+
print(f"β οΈ Hypothesis edge case: {err[:80]}")
|
| 263 |
+
hypothesis_result = f"β οΈ Edge case found: {err}"
|
| 264 |
+
|
| 265 |
+
except subprocess.TimeoutExpired:
|
| 266 |
+
hypothesis_result = "οΏ½οΏ½οΈ Timed out β possible infinite loop on edge input"
|
| 267 |
+
except Exception as e:
|
| 268 |
+
hypothesis_result = f"β οΈ Error: {str(e)[:100]}"
|
| 269 |
+
|
| 270 |
+
return {"hypothesis_result": hypothesis_result}
|
| 271 |
+
|
| 272 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
+
# NODE 7 β PERFORMANCE BENCHMARKER
|
| 274 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 275 |
+
def performance_benchmarker(state: State):
|
| 276 |
+
print("\nβ‘ Benchmarking performance...")
|
| 277 |
+
code = state["fixed_code"] if state["fixed_code"] else state["code"]
|
| 278 |
+
clean_code = code.replace("'", "")
|
| 279 |
+
|
| 280 |
+
benchmark_code = (
|
| 281 |
+
code + "\n\n"
|
| 282 |
+
"import timeit as _t, ast as _a\n"
|
| 283 |
+
"_tree = _a.parse('''" + clean_code + "''')\n"
|
| 284 |
+
"_fns = [n.name for n in _a.walk(_tree) "
|
| 285 |
+
"if isinstance(n, _a.FunctionDef) and not n.name.startswith('_')]\n"
|
| 286 |
+
"if _fns:\n"
|
| 287 |
+
" _f = _fns[0]\n"
|
| 288 |
+
" _ran = False\n"
|
| 289 |
+
" for _call in [_f+'(100)', _f+'(\"hello\")', _f+'([1,2,3,4,5])', _f+'(\"racecar\")', _f+'(10)']:\n"
|
| 290 |
+
" try:\n"
|
| 291 |
+
" _ms = _t.timeit(_call, globals=globals(), number=1000)*1000\n"
|
| 292 |
+
" print('BENCHMARK:'+str(round(_ms,2))+'ms')\n"
|
| 293 |
+
" _ran = True\n"
|
| 294 |
+
" break\n"
|
| 295 |
+
" except: continue\n"
|
| 296 |
+
" if not _ran: print('BENCHMARK:skipped')\n"
|
| 297 |
+
"else: print('BENCHMARK:skipped')\n"
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
try:
|
| 301 |
+
result = subprocess.run(
|
| 302 |
+
["python", "-c", benchmark_code],
|
| 303 |
+
capture_output=True, text=True, timeout=20
|
| 304 |
+
)
|
| 305 |
+
output = result.stdout + result.stderr
|
| 306 |
+
match = re.search(r"BENCHMARK:([\d.]+)ms", output)
|
| 307 |
+
if match:
|
| 308 |
+
ms = float(match.group(1))
|
| 309 |
+
print(f"β‘ {ms:.2f}ms per 1000 runs")
|
| 310 |
+
if ms > 5000:
|
| 311 |
+
return {
|
| 312 |
+
"benchmark_ms": ms,
|
| 313 |
+
"error": f"Too slow: {ms:.0f}ms. Optimize algorithm.",
|
| 314 |
+
"passed": False
|
| 315 |
+
}
|
| 316 |
+
return {"benchmark_ms": ms}
|
| 317 |
+
return {"benchmark_ms": 0.0}
|
| 318 |
+
except Exception as e:
|
| 319 |
+
print(f"β οΈ Benchmark error: {e}")
|
| 320 |
+
return {"benchmark_ms": 0.0}
|
| 321 |
+
|
| 322 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 323 |
+
# NODE 8 β DEBUGGER
|
| 324 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 325 |
+
def debugger(state: State):
|
| 326 |
+
print(f"\nπ§ Debugger fixing (attempt {state['retries']+1})...")
|
| 327 |
+
|
| 328 |
+
response = llm.invoke([
|
| 329 |
+
SystemMessage(content="""You are a Python debugger.
|
| 330 |
+
Fix the exact error. Return ONLY fixed code β no markdown, no backticks."""),
|
| 331 |
+
HumanMessage(content=f"""
|
| 332 |
+
CODE:
|
| 333 |
+
{state['code']}
|
| 334 |
+
|
| 335 |
+
ERROR:
|
| 336 |
+
{state['error']}
|
| 337 |
+
|
| 338 |
+
Return complete fixed Python code only:
|
| 339 |
+
""")
|
| 340 |
+
])
|
| 341 |
+
|
| 342 |
+
fixed = response.content
|
| 343 |
+
fixed = re.sub(r"```python", "", fixed)
|
| 344 |
+
fixed = re.sub(r"```", "", fixed)
|
| 345 |
+
fixed = fixed.strip()
|
| 346 |
+
|
| 347 |
+
try:
|
| 348 |
+
stable_id = hashlib.md5(state["error"].encode()).hexdigest()[:8]
|
| 349 |
+
memory_collection.add(
|
| 350 |
+
documents=[f"BUG: {state['error']}\nFIX: {fixed}"],
|
| 351 |
+
ids=[f"fix_{state['retries']}_{stable_id}"]
|
| 352 |
+
)
|
| 353 |
+
print("π§ Stored in memory!")
|
| 354 |
+
except Exception:
|
| 355 |
+
pass
|
| 356 |
+
|
| 357 |
+
return {"fixed_code": fixed, "retries": state["retries"] + 1}
|
| 358 |
+
|
| 359 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 360 |
+
# NODE 9 β SECURITY AUDITOR
|
| 361 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 362 |
+
def security_auditor(state: State):
|
| 363 |
+
print("\nπ Security check...")
|
| 364 |
+
code = state["final_code"] if state["final_code"] else state["code"]
|
| 365 |
+
|
| 366 |
+
dangerous = [
|
| 367 |
+
("eval(", "Code execution via eval"),
|
| 368 |
+
("exec(", "Code execution via exec"),
|
| 369 |
+
("os.system(", "Shell injection risk"),
|
| 370 |
+
("__import__(", "Dynamic import risk"),
|
| 371 |
+
("pickle.loads(","Deserialization attack"),
|
| 372 |
+
("password =", "Hardcoded credential"),
|
| 373 |
+
("api_key =", "Hardcoded API key"),
|
| 374 |
+
]
|
| 375 |
+
|
| 376 |
+
found = [reason for pattern, reason in dangerous if pattern.lower() in code.lower()]
|
| 377 |
+
|
| 378 |
+
if found:
|
| 379 |
+
print(f"β Security issues: {found}")
|
| 380 |
+
return {
|
| 381 |
+
"is_secure": False,
|
| 382 |
+
"error": f"Security issues: {found}",
|
| 383 |
+
"security_retries": state["security_retries"] + 1
|
| 384 |
+
}
|
| 385 |
+
|
| 386 |
+
print("β
Security passed!")
|
| 387 |
+
return {"is_secure": True}
|
| 388 |
+
|
| 389 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 390 |
+
# NODE 10 β COMPLEXITY JUDGE
|
| 391 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 392 |
+
def complexity_judge(state: State):
|
| 393 |
+
print("\nπ Complexity check...")
|
| 394 |
+
code = state["final_code"] if state["final_code"] else state["code"]
|
| 395 |
+
lines = code.split("\n")
|
| 396 |
+
issues = []
|
| 397 |
+
|
| 398 |
+
if len(lines) > 60:
|
| 399 |
+
issues.append(f"Too long: {len(lines)} lines")
|
| 400 |
+
|
| 401 |
+
max_indent = max(
|
| 402 |
+
(len(l) - len(l.lstrip()) for l in lines if l.strip()), default=0
|
| 403 |
+
)
|
| 404 |
+
if max_indent > 16:
|
| 405 |
+
issues.append("Too deeply nested")
|
| 406 |
+
|
| 407 |
+
try:
|
| 408 |
+
response = llm.invoke([
|
| 409 |
+
HumanMessage(f"Rate complexity 1-10:\n{code}\nReply ONLY a number 1-10.")
|
| 410 |
+
])
|
| 411 |
+
score = int(re.search(r'\d+', response.content.strip()).group())
|
| 412 |
+
except Exception:
|
| 413 |
+
score = 5
|
| 414 |
+
|
| 415 |
+
print(f"Complexity: {score}/10")
|
| 416 |
+
|
| 417 |
+
if score > 7 or issues:
|
| 418 |
+
print(f"β Too complex: {issues}")
|
| 419 |
+
return {
|
| 420 |
+
"is_simple": False,
|
| 421 |
+
"error": f"Too complex (score {score}/10). Simplify.",
|
| 422 |
+
"complexity_retries": state["complexity_retries"] + 1
|
| 423 |
+
}
|
| 424 |
+
|
| 425 |
+
print("β
Complexity passed!")
|
| 426 |
+
return {"is_simple": True}
|
| 427 |
+
|
| 428 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 429 |
+
# NODE 11 β SELF REFLECTION
|
| 430 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 431 |
+
def self_reflection(state: State):
|
| 432 |
+
print("\nπͺ Self Reflection...")
|
| 433 |
+
code = state["final_code"] if state["final_code"] else state["code"]
|
| 434 |
+
|
| 435 |
+
response = llm.invoke([
|
| 436 |
+
SystemMessage(content="""You are a senior Python engineer.
|
| 437 |
+
Reply in EXACTLY this format:
|
| 438 |
+
CONFIDENCE: <1-10>
|
| 439 |
+
APPROVED: <YES or NO>
|
| 440 |
+
ISSUES: <list or NONE>
|
| 441 |
+
NOTES: <one sentence>"""),
|
| 442 |
+
HumanMessage(content=f"Review this code:\nTASK: {state['task']}\nCODE:\n{code}")
|
| 443 |
+
])
|
| 444 |
+
|
| 445 |
+
reflection = response.content.strip()
|
| 446 |
+
lines_map = {}
|
| 447 |
+
for line in reflection.splitlines():
|
| 448 |
+
if ":" in line:
|
| 449 |
+
key, _, val = line.partition(":")
|
| 450 |
+
lines_map[key.strip().upper()] = val.strip()
|
| 451 |
+
|
| 452 |
+
try:
|
| 453 |
+
confidence = int(re.search(r'\d+', lines_map.get("CONFIDENCE", "7")).group())
|
| 454 |
+
except Exception:
|
| 455 |
+
confidence = 7
|
| 456 |
+
|
| 457 |
+
try:
|
| 458 |
+
approved = "YES" in lines_map.get("APPROVED", "YES").upper()
|
| 459 |
+
except Exception:
|
| 460 |
+
approved = True
|
| 461 |
+
|
| 462 |
+
issues_text = lines_map.get("ISSUES", "NONE")
|
| 463 |
+
notes = lines_map.get("NOTES", "Looks good")
|
| 464 |
+
has_issues = issues_text.upper() not in ("NONE", "") and bool(issues_text.strip())
|
| 465 |
+
|
| 466 |
+
if not approved or (has_issues and confidence < 7):
|
| 467 |
+
print(f"β Reflection: confidence {confidence}/10")
|
| 468 |
+
return {
|
| 469 |
+
"reflection_ok": False,
|
| 470 |
+
"reflection_notes": f"Issues: {issues_text}. {notes}",
|
| 471 |
+
"confidence_score": confidence,
|
| 472 |
+
"error": f"Reflection failed ({confidence}/10): {issues_text}"
|
| 473 |
+
}
|
| 474 |
+
|
| 475 |
+
print(f"β
Reflection approved ({confidence}/10)")
|
| 476 |
+
return {
|
| 477 |
+
"reflection_ok": True,
|
| 478 |
+
"reflection_notes": notes,
|
| 479 |
+
"confidence_score": confidence
|
| 480 |
+
}
|
| 481 |
+
|
| 482 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 483 |
+
# NODE 12 β REVIEWER
|
| 484 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 485 |
+
def reviewer(state: State):
|
| 486 |
+
print("\n⨠Reviewer polishing + explaining...")
|
| 487 |
+
code = state["fixed_code"] if state["fixed_code"] else state["code"]
|
| 488 |
+
|
| 489 |
+
response = llm.invoke([
|
| 490 |
+
SystemMessage(content="""You are a senior Python developer and teacher.
|
| 491 |
+
Do TWO things and return in EXACTLY this format:
|
| 492 |
+
|
| 493 |
+
FINAL_CODE:
|
| 494 |
+
<complete polished code with docstrings and type hints>
|
| 495 |
+
|
| 496 |
+
EXPLANATION:
|
| 497 |
+
<simple explanation covering: what it does, how it works, time complexity, example usage>
|
| 498 |
+
"""),
|
| 499 |
+
HumanMessage(content=f"Polish this code and explain it:\n{code}")
|
| 500 |
+
])
|
| 501 |
+
|
| 502 |
+
content = response.content
|
| 503 |
+
final_code = ""
|
| 504 |
+
explanation= ""
|
| 505 |
+
|
| 506 |
+
if "FINAL_CODE:" in content and "EXPLANATION:" in content:
|
| 507 |
+
parts = content.split("EXPLANATION:")
|
| 508 |
+
code_part = parts[0].replace("FINAL_CODE:", "").strip()
|
| 509 |
+
code_part = re.sub(r"```python", "", code_part)
|
| 510 |
+
code_part = re.sub(r"```", "", code_part)
|
| 511 |
+
final_code = code_part.strip()
|
| 512 |
+
explanation = parts[1].strip()
|
| 513 |
+
else:
|
| 514 |
+
final_code = code
|
| 515 |
+
explanation = content.strip()
|
| 516 |
+
|
| 517 |
+
if not explanation:
|
| 518 |
+
explanation = "Code completed successfully. See final code above."
|
| 519 |
+
|
| 520 |
+
return {
|
| 521 |
+
"final_code": final_code,
|
| 522 |
+
"explanation": explanation,
|
| 523 |
+
"review": "Polished and explained"
|
| 524 |
+
}
|
| 525 |
+
|
| 526 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 527 |
+
# NODE 13 β EXPLAINER (passthrough)
|
| 528 |
+
# βββββββββββββββββββββββββββββββββββββββββ
|
| 529 |
+
def explainer(state: State):
|
| 530 |
+
if not state.get("explanation"):
|
| 531 |
+
return {"explanation": "Code completed successfully. See final code above."}
|
| 532 |
+
return {}
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
streamlit==1.35.0
|
| 2 |
+
langgraph==0.2.0
|
| 3 |
+
langchain-groq==0.1.6
|
| 4 |
+
langchain-core==0.2.0
|
| 5 |
+
chromadb==0.5.0
|
| 6 |
+
hypothesis==6.100.0
|
state.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# state.py β Shared State for Autonomous Python Coding Agent
|
| 2 |
+
|
| 3 |
+
from typing import TypedDict
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
class State(TypedDict):
|
| 7 |
+
task: str
|
| 8 |
+
plan: str
|
| 9 |
+
code: str
|
| 10 |
+
test_result: str
|
| 11 |
+
error: str
|
| 12 |
+
fixed_code: str
|
| 13 |
+
explanation: str
|
| 14 |
+
review: str
|
| 15 |
+
final_code: str
|
| 16 |
+
retries: int
|
| 17 |
+
security_retries: int
|
| 18 |
+
complexity_retries: int
|
| 19 |
+
passed: bool
|
| 20 |
+
is_secure: bool
|
| 21 |
+
is_simple: bool
|
| 22 |
+
ast_valid: bool
|
| 23 |
+
generated_tests: str
|
| 24 |
+
hypothesis_result: str
|
| 25 |
+
benchmark_ms: float
|
| 26 |
+
reflection_ok: bool
|
| 27 |
+
reflection_notes: str
|
| 28 |
+
confidence_score: int
|