Keeby-smilyai commited on
Commit
6bdb2fa
·
verified ·
1 Parent(s): 9193b16

Update backend.py

Browse files
Files changed (1) hide show
  1. backend.py +242 -519
backend.py CHANGED
@@ -1,169 +1,116 @@
1
- # backend.py — FINAL VERSION PHASE-3 MINI INSTRUCT + CACHING + LOGGING
2
  import sqlite3
3
  import os
4
  import json
5
- import uuid
6
- import zipfile
7
- import tempfile
8
- import subprocess
9
  import re
10
  import concurrent.futures
11
- from typing import List, Dict, Any
12
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
 
 
13
  import torch
14
  import psutil
 
15
 
 
16
  DB_PATH = "code_agents_pro.db"
17
  PROJECT_ROOT = "./projects"
18
  os.makedirs(PROJECT_ROOT, exist_ok=True)
19
 
20
- # ------------------------------ DATABASE ------------------------------
 
21
  def init_db():
22
- conn = sqlite3.connect(DB_PATH)
23
- cursor = conn.cursor()
24
- cursor.executescript("""
25
- CREATE TABLE IF NOT EXISTS users (
26
- id INTEGER PRIMARY KEY AUTOINCREMENT,
27
- username TEXT UNIQUE NOT NULL,
28
- password_hash TEXT NOT NULL,
29
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP
30
- );
31
- CREATE TABLE IF NOT EXISTS projects (
32
- id INTEGER PRIMARY KEY AUTOINCREMENT,
33
- user_id INTEGER NOT NULL,
34
- title TEXT,
35
- description TEXT,
36
- status TEXT DEFAULT 'queued',
37
- zip_path TEXT,
38
- logs TEXT DEFAULT '',
39
- created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
40
- FOREIGN KEY (user_id) REFERENCES users(id)
41
- );
42
- CREATE INDEX idx_user_status ON projects(user_id, status);
43
- """)
44
- conn.commit()
45
- conn.close()
46
 
47
  init_db()
48
 
 
 
49
  def hash_password(password):
50
- import hashlib
51
  return hashlib.sha256(password.encode()).hexdigest()
52
 
53
  def verify_password(password, stored_hash):
54
  return hash_password(password) == stored_hash
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  def create_user(username, password):
57
- conn = sqlite3.connect(DB_PATH)
58
- cursor = conn.cursor()
59
  try:
60
- cursor.execute("INSERT INTO users (username, password_hash) VALUES (?, ?)",
61
- (username, hash_password(password)))
62
- user_id = cursor.lastrowid
63
- conn.commit()
64
- conn.close()
65
- return user_id
66
  except sqlite3.IntegrityError:
67
- conn.close()
68
  return None
69
 
70
  def get_user_by_username(username):
71
- conn = sqlite3.connect(DB_PATH)
72
- cursor = conn.cursor()
73
- cursor.execute("SELECT id, password_hash FROM users WHERE username = ?", (username,))
74
- row = cursor.fetchone()
75
- conn.close()
76
- return row if row else None
77
 
78
  def get_user_projects(user_id, limit=3):
79
- conn = sqlite3.connect(DB_PATH)
80
- cursor = conn.cursor()
81
- cursor.execute("""
82
- SELECT id, title, description, status, zip_path, created_at, logs
83
- FROM projects WHERE user_id = ? ORDER BY created_at DESC LIMIT ?
84
- """, (user_id, limit))
85
- projects = cursor.fetchall()
86
- conn.close()
87
- return projects
88
 
89
  def create_project(user_id, title, description):
90
- conn = sqlite3.connect(DB_PATH)
91
- cursor = conn.cursor()
92
- cursor.execute("""
93
- INSERT INTO projects (user_id, title, description, status, logs)
94
- VALUES (?, ?, ?, 'queued', '')
95
- """, (user_id, title, description))
96
- project_id = cursor.lastrowid
97
- conn.commit()
98
- conn.close()
99
- return project_id
100
 
101
  def update_project_status(project_id, status, logs=None, zip_path=None):
102
- conn = sqlite3.connect(DB_PATH)
103
- cursor = conn.cursor()
104
- if zip_path:
105
- cursor.execute("""
106
- UPDATE projects SET status = ?, zip_path = ?, logs = ? WHERE id = ?
107
- """, (status, zip_path, logs, project_id))
108
- else:
109
- cursor.execute("""
110
- UPDATE projects SET status = ?, logs = ? WHERE id = ?
111
- """, (status, logs, project_id))
112
- conn.commit()
113
- conn.close()
114
 
115
  def get_project(project_id):
116
- conn = sqlite3.connect(DB_PATH)
117
- cursor = conn.cursor()
118
- cursor.execute("""
119
- SELECT id, user_id, title, description, status, zip_path, logs
120
- FROM projects WHERE id = ?
121
- """, (project_id,))
122
- row = cursor.fetchone()
123
- conn.close()
124
- return row
125
 
126
  def search_projects(user_id, query):
127
- conn = sqlite3.connect(DB_PATH)
128
- cursor = conn.cursor()
129
- cursor.execute("""
130
- SELECT id, title, description, status, zip_path
131
- FROM projects WHERE user_id = ? AND (title LIKE ? OR description LIKE ?)
132
- ORDER BY created_at DESC
133
- """, (user_id, f"%{query}%", f"%{query}%"))
134
- results = cursor.fetchall()
135
- conn.close()
136
- return results
137
-
138
- # ------------------------------ SYSTEM MONITORING ------------------------------
139
- def get_ram_usage_gb():
140
- return round(psutil.virtual_memory().used / (1024**3), 2)
141
-
142
- def get_vram_usage_gb():
143
- """Try to get GPU VRAM usage. Returns 0 if no GPU or error."""
144
- try:
145
- if torch.cuda.is_available():
146
- torch.cuda.synchronize()
147
- allocated = torch.cuda.memory_allocated() / (1024**3)
148
- reserved = torch.cuda.memory_reserved() / (1024**3)
149
- return round(max(allocated, reserved), 2)
150
- else:
151
- return 0.0
152
- except:
153
- return 0.0
154
-
155
- # ------------------------------ MODEL LOADING (PER ROLE) ------------------------------
156
  MODEL_REGISTRY = {
157
- "understander": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
158
  "architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
159
- "ceo": "microsoft/Phi-3-mini-4k-instruct",
160
- "manager": "microsoft/Phi-3-mini-4k-instruct",
161
- "worker": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
162
  "reviewer": "microsoft/Phi-3-mini-4k-instruct",
163
- "editor": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
164
- "tester": "microsoft/Phi-3-mini-4k-instruct",
165
- "publisher": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
166
- "final_ceo": "microsoft/Phi-3-mini-4k-instruct",
167
  }
168
 
169
  _MODEL_CACHE = {}
@@ -171,434 +118,210 @@ _MODEL_CACHE = {}
171
  def load_model(model_name):
172
  if model_name in _MODEL_CACHE:
173
  return _MODEL_CACHE[model_name]
174
-
175
- tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
176
  model = AutoModelForCausalLM.from_pretrained(
177
  model_name,
178
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
179
  device_map="auto",
180
  trust_remote_code=True,
181
- attn_implementation="eager" # ✅ Avoids flash-attn requirement — safe on all hardware
182
  )
183
  _MODEL_CACHE[model_name] = (tokenizer, model)
 
184
  return tokenizer, model
185
 
186
- # ------------------------------ PROMPTS PER ROLE ------------------------------
187
  ROLE_PROMPTS = {
188
- "understander": """
189
- You are the Understander. Your job is to deeply analyze the user's project request.
190
- Extract:
191
- - Core functionality
192
- - Required libraries
193
- - File structure
194
- - Edge cases
195
-
196
- Output only JSON:
197
- {
198
- "purpose": "string",
199
- "features": ["list of features"],
200
- "dependencies": ["list of required pip packages"],
201
- "files": [
202
- {"path": "filename.py", "type": "source|test|config|other"},
203
- ...
204
- ]
205
- }
206
- """,
207
-
208
- "architect": """
209
- You are the Architect. Based on the Understander's analysis, design the full project structure.
210
- Define:
211
- - Folder hierarchy
212
- - File contents (minimal skeleton)
213
- - Configuration files
214
-
215
- Output only JSON:
216
- {
217
- "structure": [
218
- {"path": "src/main.py", "content": "// empty file"},
219
- {"path": "requirements.txt", "content": "flask\\nrequests"}
220
- ],
221
- "notes": "Brief notes on architecture decisions"
222
- }
223
- """,
224
-
225
- "ceo": """
226
- You are the CEO. You oversee the entire project. You receive input from the Architect.
227
- Your job is to review the plan, ensure alignment with goals, and assign tasks to Managers.
228
- Respond only with: "Approved. Assigning to Managers."
229
- """,
230
-
231
- "manager": """
232
- You are a Manager. You receive a task from the CEO.
233
- Assign work to Workers based on file type.
234
- Example:
235
- - "Write src/main.py" → assign to Worker A
236
- - "Write tests/test_main.py" → assign to Worker B
237
-
238
- Return only JSON:
239
- {
240
- "tasks": [
241
- {"file": "src/main.py", "assigned_to": "worker", "instructions": "Write a Flask route..."},
242
- {"file": "tests/test_main.py", "assigned_to": "worker", "instructions": "Write a test for the /upload endpoint..."}
243
- ]
244
- }
245
- """,
246
-
247
- "worker": """
248
- You are a Worker. You write clean, functional code based on instructions.
249
- Never guess. Use tools if needed.
250
- Only output the raw file content — no markdown, no explanations.
251
- If writing Python, use proper imports, error handling, comments.
252
-
253
- File: {file}
254
- Instructions: {instructions}
255
- """,
256
-
257
- "reviewer": """
258
- You are the Reviewer. You audit all code written by Workers.
259
- Check for:
260
- - Security flaws (eval(), shell injection)
261
- - Missing error handling
262
- - Unused imports
263
- - Poor variable names
264
- - Logic errors
265
-
266
- For each issue, return:
267
- {
268
- "file": "path/to/file.py",
269
- "issue": "description",
270
- "suggestion": "fix suggestion"
271
  }
272
 
273
- If no issues: return {"issues": []}
274
- """,
275
-
276
- "editor": """
277
- You are the Editor. Apply all Reviewer suggestions to the code.
278
- Update files directly.
279
- Do not explain — just rewrite the file content.
280
- If a file doesn't exist, create it.
281
- Return only the updated file content.
282
- """,
283
-
284
- "tester": """
285
- You are a Tester. Write unit tests for each source file.
286
- Use pytest. Structure:
287
- - File: tests/test_{filename}.py
288
- - Use assert statements
289
- - Cover edge cases
290
-
291
- Only output the test file content — nothing else.
292
- """,
293
-
294
- "publisher": """
295
- You are the Publisher. Finalize the project.
296
- Generate:
297
- - README.md (plain text, no markdown formatting)
298
- - Dockerfile
299
- - requirements.txt (complete, deduplicated)
300
- - .gitignore
301
-
302
- Output each file as:
303
- {
304
- "README.md": "...",
305
- "Dockerfile": "...",
306
- "requirements.txt": "...",
307
- ".gitignore": "venv/\n__pycache__/"
308
- }
309
- """,
310
-
311
- "final_ceo": """
312
- You are the final CEO. You receive the packaged project from the Publisher.
313
- Verify everything is complete and correct.
314
- If yes: respond with "Ready to package."
315
- If no: list missing items.
316
- """
317
- }
318
 
319
- # ------------------------------ TOOLS (UNCHANGED) ------------------------------
320
- def get_user_project_dir(user_id):
321
- user_dir = os.path.join(PROJECT_ROOT, str(user_id))
322
- os.makedirs(user_dir, exist_ok=True)
323
- return user_dir
324
-
325
- def clear_user_project_dir(user_id):
326
- user_dir = get_user_project_dir(user_id)
327
- for f in os.listdir(user_dir):
328
- path = os.path.join(user_dir, f)
329
- if os.path.isdir(path):
330
- import shutil
331
- shutil.rmtree(path)
332
- else:
333
- os.remove(path)
334
-
335
- def create_file(user_id, path: str, content: str):
336
- user_dir = get_user_project_dir(user_id)
337
- full_path = os.path.join(user_dir, path.lstrip("/"))
338
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
339
- with open(full_path, 'w') as f:
340
  f.write(content)
341
- return f"Created: {path}"
342
-
343
- def read_file(user_id, path: str):
344
- user_dir = get_user_project_dir(user_id)
345
- full_path = os.path.join(user_dir, path.lstrip("/"))
346
- if not os.path.exists(full_path):
347
- return f"File not found: {path}"
348
- with open(full_path, 'r') as f:
349
- return f.read()
350
 
351
- def list_files(user_id):
352
- user_dir = get_user_project_dir(user_id)
353
- files = []
354
- for root, _, filenames in os.walk(user_dir):
355
- for f in filenames:
356
- files.append(os.path.relpath(os.path.join(root, f), user_dir))
357
- return files
358
 
359
- def execute_code(user_id, code: str):
360
- try:
361
- user_dir = get_user_project_dir(user_id)
362
- with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, dir=user_dir) as f:
363
- f.write(code)
364
- temp_path = f.name
365
- result = subprocess.run(["python3", temp_path], capture_output=True, text=True, timeout=10)
366
- os.unlink(temp_path)
367
- if result.returncode == 0:
368
- return result.stdout
369
- else:
370
- return f"Error: {result.stderr}"
371
- except Exception as e:
372
- return f"Timeout/Error: {str(e)}"
373
-
374
- def run_tests(user_id):
375
- user_dir = get_user_project_dir(user_id)
376
- test_dir = os.path.join(user_dir, "tests")
377
- if not os.path.exists(test_dir):
378
- return "No test directory found."
379
- result = subprocess.run(["pytest", test_dir], capture_output=True, text=True, cwd=user_dir)
380
- return result.stdout
381
-
382
- def lint_code(user_id):
383
- user_dir = get_user_project_dir(user_id)
384
- result = subprocess.run(["flake8", user_dir], capture_output=True, text=True)
385
- return result.stdout if result.returncode != 0 else "No linting errors."
386
-
387
- def scan_vulns(user_id):
388
- reqs = os.path.join(get_user_project_dir(user_id), "requirements.txt")
389
- if not os.path.exists(reqs):
390
- return "No requirements.txt found."
391
- result = subprocess.run(["pip-audit", "-r", reqs], capture_output=True, text=True)
392
- return result.stdout if result.returncode != 0 else "No known vulnerabilities found."
393
-
394
- def generate_dockerfile(user_id, content: str):
395
- create_file(user_id, "Dockerfile", content)
396
- return "Dockerfile generated."
397
-
398
- def write_readme(user_id, content: str):
399
- create_file(user_id, "README.md", content)
400
- return "README.md generated."
401
-
402
- def write_requirements(user_id, content: str):
403
- create_file(user_id, "requirements.txt", content)
404
- return "requirements.txt generated."
405
-
406
- def write_gitignore(user_id, content: str):
407
- create_file(user_id, ".gitignore", content)
408
- return ".gitignore generated."
409
-
410
- def zip_project(user_id, project_name: str):
411
- user_dir = get_user_project_dir(user_id)
412
- zip_path = os.path.join(user_dir, f"{project_name}.zip")
413
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
414
- for root, _, files in os.walk(user_dir):
415
- for f in files:
416
- if f.endswith(".zip"): continue
417
- full_path = os.path.join(root, f)
418
- arcname = os.path.relpath(full_path, user_dir)
419
  zf.write(full_path, arcname)
420
  return zip_path
421
 
422
- # ------------------------------ LLM GENERATOR (WITH CHAT TEMPLATE FOR PHI-3) ------------------------------
423
- def generate_with_model(role: str, prompt: str, context: dict = {}) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
424
  try:
425
  model_name = MODEL_REGISTRY[role]
426
  tokenizer, model = load_model(model_name)
 
 
 
 
427
 
428
- # Format message using Phi-3 or Qwen chat template
429
- if "Phi-3" in model_name:
430
- # Phi-3 format
431
- messages = [
432
- {"role": "system", "content": ROLE_PROMPTS[role]},
433
- {"role": "user", "content": prompt}
434
- ]
435
- text = tokenizer.apply_chat_template(
436
- messages,
437
- tokenize=False,
438
- add_generation_prompt=True
439
- )
440
- else:
441
- # Qwen format
442
- messages = [
443
- {"role": "system", "content": ROLE_PROMPTS[role]},
444
- {"role": "user", "content": prompt}
445
- ]
446
- text = tokenizer.apply_chat_template(
447
- messages,
448
- tokenize=False,
449
- add_generation_prompt=True
450
- )
451
-
452
- inputs = tokenizer(text, return_tensors="pt").to(model.device)
453
-
454
- outputs = model.generate(
455
- **inputs,
456
- max_new_tokens=512,
457
- temperature=0.2,
458
- top_p=0.9,
459
- do_sample=True,
460
- pad_token_id=tokenizer.eos_token_id,
461
- eos_token_id=tokenizer.eos_token_id
462
- )
463
-
464
- response = tokenizer.decode(outputs[0][len(inputs["input_ids"][0]):], skip_special_tokens=True)
465
  return response.strip()
466
 
467
  except Exception as e:
468
- return f"[ERROR IN {role.upper()}]: {str(e)}"
 
469
 
470
- # ------------------------------ ASYNC JOB EXECUTOR (WITH FULL LOGGING) ------------------------------
471
  def run_agent_chain(project_id, user_id, initial_prompt):
 
 
 
 
 
 
 
 
 
 
472
  try:
473
- update_project_status(project_id, "running", f"Starting agent chain...\nRAM: {get_ram_usage_gb()}GB | VRAM: {get_vram_usage_gb()}GB")
474
-
475
- user_dir = get_user_project_dir(user_id)
476
- clear_user_project_dir(user_id)
477
-
478
- log_entries = []
479
-
480
- def log_step(agent, action, output):
481
- entry = f"[{agent}] {action}\n→ {output[:500]}{'...' if len(output) > 500 else ''}\n\n"
482
- log_entries.append(entry)
483
- current_logs = ''.join(log_entries)
484
- update_project_status(project_id, "running", current_logs)
485
-
486
- # Step 1: Understander
487
- log_step("Understander", "Analyzing prompt...", "Started...")
488
- understand_result = generate_with_model("understander", initial_prompt)
489
- log_step("Understander", "Analysis complete", understand_result)
490
-
491
- # Step 2: Architect
492
- log_step("Architect", "Designing structure...", "Started...")
493
- architect_result = generate_with_model("architect", understand_result)
494
- log_step("Architect", "Structure designed", architect_result)
495
-
496
- # Parse architect JSON
497
- try:
498
- import json
499
- arch_data = json.loads(architect_result)
500
- for file in arch_data.get("structure", []):
501
- create_file(user_id, file["path"], file["content"])
502
- except Exception as e:
503
- log_step("Architect", "JSON parse failed", str(e))
504
-
505
- # Step 3: CEO
506
- log_step("CEO", "Reviewing architecture...", "Started...")
507
- ceo_response = generate_with_model("ceo", "Review architecture and approve.")
508
- log_step("CEO", "Response", ceo_response)
509
-
510
- # Step 4: Manager assigns tasks
511
- log_step("Manager", "Assigning tasks...", "Started...")
512
- manager_response = generate_with_model("manager", "Assign tasks based on architecture.")
513
- log_step("Manager", "Tasks assigned", manager_response)
514
-
515
- try:
516
- import json
517
- tasks = json.loads(manager_response).get("tasks", [])
518
- for task in tasks:
519
- worker_response = generate_with_model("worker", "", context={
520
- "file": task["file"],
521
- "instructions": task["instructions"]
522
- })
523
- create_file(user_id, task["file"], worker_response)
524
- log_step("Worker", f"Generated {task['file']}", worker_response[:300])
525
- except Exception as e:
526
- log_step("Manager", "Task parsing failed", str(e))
527
-
528
- # Step 5: Reviewer
529
- all_files = "\n".join(list_files(user_id))
530
- reviewer_input = f"Review these files:\n\n{all_files}"
531
- log_step("Reviewer", "Auditing code...", "Started...")
532
- reviewer_result = generate_with_model("reviewer", reviewer_input)
533
- log_step("Reviewer", "Audit complete", reviewer_result)
534
-
535
- # Step 6: Editor applies fixes
536
- editor_input = f"Apply these changes:\n{reviewer_result}\n\nFiles:\n{all_files}"
537
- log_step("Editor", "Applying fixes...", "Started...")
538
- editor_response = generate_with_model("editor", editor_input)
539
- log_step("Editor", "Fixes applied", "Applied corrections to files.")
540
-
541
- # Step 7: Testers generate tests
542
- for filename in [f for f in list_files(user_id) if f.endswith(".py") and not f.startswith("test_")]:
543
- test_prompt = f"Write a test for this file: {filename}"
544
- log_step("Tester", f"Generating test for {filename}...", "Started...")
545
- test_content = generate_with_model("tester", test_prompt)
546
- test_path = f"tests/test_{os.path.basename(filename)}"
547
- create_file(user_id, test_path, test_content)
548
- log_step("Tester", f"Test saved to {test_path}", test_content[:200])
549
-
550
- # Step 8: Editor reviews tests
551
- test_files = "\n".join([f for f in list_files(user_id) if f.startswith("tests/")])
552
- editor_test_input = f"Review these test files:\n\n{test_files}\n\nMake sure they are complete and correct."
553
- log_step("Editor", "Reviewing tests...", "Started...")
554
- editor_test_response = generate_with_model("editor", editor_test_input)
555
- log_step("Editor", "Test review complete", "Reviewed and corrected test files.")
556
-
557
- # Step 9: Publisher creates final assets
558
- publisher_input = "Generate final project assets: README.md, Dockerfile, requirements.txt, .gitignore"
559
- log_step("Publisher", "Generating final files...", "Started...")
560
- publisher_response = generate_with_model("publisher", publisher_input)
561
- log_step("Publisher", "Final assets generated", publisher_response)
562
-
563
- try:
564
- import json
565
- pub_data = json.loads(publisher_response)
566
- for fname, content in pub_data.items():
567
- if fname == "README.md":
568
- write_readme(user_id, content)
569
- elif fname == "Dockerfile":
570
- generate_dockerfile(user_id, content)
571
- elif fname == "requirements.txt":
572
- write_requirements(user_id, content)
573
- elif fname == ".gitignore":
574
- write_gitignore(user_id, content)
575
- except Exception as e:
576
- log_step("Publisher", "JSON parsing failed", str(e))
577
-
578
- # Step 10: Final CEO
579
- final_input = "All files generated. Verify completeness."
580
- log_step("Final CEO", "Final verification...", "Started...")
581
- final_ceo_response = generate_with_model("final_ceo", final_input)
582
- log_step("Final CEO", "Verification result", final_ceo_response)
583
-
584
- if "Ready to package" not in final_ceo_response:
585
- raise Exception(f"Final CEO rejected: {final_ceo_response}")
586
-
587
- # Step 11: ZIP & Save
588
- log_step("System", "Creating ZIP archive...", "Started...")
589
- zip_path = zip_project(user_id, f"project_{project_id}")
590
- log_step("System", "ZIP created", f"Saved to {zip_path}")
591
-
592
- update_project_status(project_id, "completed", ''.join(log_entries), zip_path)
593
- return "Completed successfully"
594
 
595
  except Exception as e:
596
- error_log = ''.join(log_entries) + f"\n\n❌ FINAL ERROR: {str(e)}"
597
- update_project_status(project_id, "failed", error_log)
598
- return f"Failed: {str(e)}"
 
 
599
 
600
  # ------------------------------ JOB QUEUE ------------------------------
601
- executor = concurrent.futures.ThreadPoolExecutor(max_workers=2)
 
602
 
603
  def queue_job(project_id, user_id, prompt):
 
604
  executor.submit(run_agent_chain, project_id, user_id, prompt)
 
1
+ # backend.py — REFACTORED FOR ROBUSTNESS & RELIABILITY
2
  import sqlite3
3
  import os
4
  import json
 
 
 
 
5
  import re
6
  import concurrent.futures
7
+ import traceback
8
+ import zipfile
9
+ import hashlib
10
+ from typing import Optional, Dict, Any
11
+
12
  import torch
13
  import psutil
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM
15
 
16
+ # --- CONFIGURATION ---
17
  DB_PATH = "code_agents_pro.db"
18
  PROJECT_ROOT = "./projects"
19
  os.makedirs(PROJECT_ROOT, exist_ok=True)
20
 
21
+
22
+ # ------------------------------ DATABASE (REFACTORED WITH CONTEXT MANAGERS) ------------------------------
23
  def init_db():
24
+ """Initializes the database schema safely."""
25
+ with sqlite3.connect(DB_PATH) as conn:
26
+ cursor = conn.cursor()
27
+ cursor.executescript("""
28
+ CREATE TABLE IF NOT EXISTS users (
29
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
30
+ username TEXT UNIQUE NOT NULL,
31
+ password_hash TEXT NOT NULL
32
+ );
33
+ CREATE TABLE IF NOT EXISTS projects (
34
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
35
+ user_id INTEGER NOT NULL,
36
+ title TEXT,
37
+ description TEXT,
38
+ status TEXT DEFAULT 'queued',
39
+ zip_path TEXT,
40
+ logs TEXT DEFAULT '',
41
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
42
+ FOREIGN KEY (user_id) REFERENCES users(id)
43
+ );
44
+ CREATE INDEX IF NOT EXISTS idx_user_status ON projects(user_id, status);
45
+ """)
46
+ conn.commit()
 
47
 
48
  init_db()
49
 
50
+ # --- USER & PROJECT CRUD (ALL REFACTORED FOR SAFETY) ---
51
+
52
  def hash_password(password):
 
53
  return hashlib.sha256(password.encode()).hexdigest()
54
 
55
  def verify_password(password, stored_hash):
56
  return hash_password(password) == stored_hash
57
 
58
+ def _db_execute(query, params=(), fetchone=False, fetchall=False, commit=False):
59
+ """A centralized, safe way to interact with the database."""
60
+ try:
61
+ with sqlite3.connect(DB_PATH) as conn:
62
+ conn.row_factory = sqlite3.Row
63
+ cursor = conn.cursor()
64
+ cursor.execute(query, params)
65
+ if commit:
66
+ conn.commit()
67
+ return cursor.lastrowid
68
+ if fetchone:
69
+ return cursor.fetchone()
70
+ if fetchall:
71
+ return cursor.fetchall()
72
+ except sqlite3.Error as e:
73
+ print(f"Database error: {e}")
74
+ return None
75
+
76
  def create_user(username, password):
77
+ query = "INSERT INTO users (username, password_hash) VALUES (?, ?)"
78
+ # Using a try-except block here specifically for the IntegrityError
79
  try:
80
+ return _db_execute(query, (username, hash_password(password)), commit=True)
 
 
 
 
 
81
  except sqlite3.IntegrityError:
 
82
  return None
83
 
84
  def get_user_by_username(username):
85
+ return _db_execute("SELECT id, password_hash FROM users WHERE username = ?", (username,), fetchone=True)
 
 
 
 
 
86
 
87
  def get_user_projects(user_id, limit=3):
88
+ query = "SELECT * FROM projects WHERE user_id = ? ORDER BY created_at DESC LIMIT ?"
89
+ return _db_execute(query, (user_id, limit), fetchall=True)
 
 
 
 
 
 
 
90
 
91
  def create_project(user_id, title, description):
92
+ query = "INSERT INTO projects (user_id, title, description) VALUES (?, ?, ?)"
93
+ return _db_execute(query, (user_id, title, description), commit=True)
 
 
 
 
 
 
 
 
94
 
95
  def update_project_status(project_id, status, logs=None, zip_path=None):
96
+ query = "UPDATE projects SET status = ?, logs = COALESCE(?, logs), zip_path = COALESCE(?, zip_path) WHERE id = ?"
97
+ _db_execute(query, (status, logs, zip_path, project_id), commit=True)
 
 
 
 
 
 
 
 
 
 
98
 
99
  def get_project(project_id):
100
+ return _db_execute("SELECT * FROM projects WHERE id = ?", (project_id,), fetchone=True)
 
 
 
 
 
 
 
 
101
 
102
  def search_projects(user_id, query):
103
+ sql_query = "SELECT id, title, description, status FROM projects WHERE user_id = ? AND (title LIKE ? OR description LIKE ?)"
104
+ return _db_execute(sql_query, (user_id, f"%{query}%", f"%{query}%"), fetchall=True)
105
+
106
+ # ------------------------------ MODEL LOADING & CACHING ------------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  MODEL_REGISTRY = {
108
+ "planner": "microsoft/Phi-3-mini-4k-instruct",
109
  "architect": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
110
+ "coder": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
 
 
111
  "reviewer": "microsoft/Phi-3-mini-4k-instruct",
112
+ "tester": "Qwen/Qwen2.5-Coder-0.6B-Instruct",
113
+ "publisher": "microsoft/Phi-3-mini-4k-instruct",
 
 
114
  }
115
 
116
  _MODEL_CACHE = {}
 
118
  def load_model(model_name):
119
  if model_name in _MODEL_CACHE:
120
  return _MODEL_CACHE[model_name]
121
+
122
+ print(f"Loading model: {model_name}...")
123
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
124
  model = AutoModelForCausalLM.from_pretrained(
125
  model_name,
126
+ torch_dtype="auto",
127
  device_map="auto",
128
  trust_remote_code=True,
129
+ attn_implementation="eager"
130
  )
131
  _MODEL_CACHE[model_name] = (tokenizer, model)
132
+ print(f"Model {model_name} loaded and cached.")
133
  return tokenizer, model
134
 
135
+ # ------------------------------ AGENT PROMPTS ------------------------------
136
  ROLE_PROMPTS = {
137
+ "planner": """You are an expert software planner. Your task is to break down a user's request into a detailed, actionable plan.
138
+ - Clarify the core purpose.
139
+ - List the key features.
140
+ - Determine the necessary libraries and dependencies.
141
+ - Define a complete file structure.
142
+ Output ONLY a single JSON object with the keys: "purpose", "features", "dependencies", "files".
143
+ The "files" key should be an array of strings representing file paths (e.g., ["src/main.py", "tests/test_main.py"]).""",
144
+
145
+ "architect": """You are a software architect. Based on the file plan, you will create the initial content for each file.
146
+ - For configuration files (like requirements.txt), list the dependencies.
147
+ - For source code files, write a brief comment or placeholder (e.g., '# TODO: Implement main logic').
148
+ - For documentation (README.md), write a title.
149
+ Output ONLY a single JSON object where keys are file paths and values are the initial content.""",
150
+
151
+ "coder": """You are a professional programmer. Your only job is to write clean, functional, and complete code for a given file based on instructions.
152
+ - You must write the full code for the file path provided.
153
+ - Do NOT add any explanations, introductions, or markdown formatting.
154
+ - Your output must be ONLY the raw source code for the file.""",
155
+
156
+ "reviewer": """You are a meticulous code reviewer. Your task is to analyze a file's code and identify potential bugs, style issues, or missing features.
157
+ - Check for logical errors, security vulnerabilities, and bad practices.
158
+ - Ensure the code is readable and well-commented.
159
+ - Output ONLY a single JSON object with two keys: "has_issues" (boolean) and "suggestions" (a string containing a bulleted list of required changes). If no issues, "has_issues" is false.""",
160
+
161
+ "tester": """You are a quality assurance engineer specializing in automated testing. Your task is to write a complete pytest test file for a given source code file.
162
+ - Cover the main functionality and edge cases.
163
+ - Use appropriate assertions.
164
+ - Your output must be ONLY the raw source code for the test file.""",
165
+
166
+ "publisher": """You are a release manager. Your job is to create the final project documentation and configuration based on the complete file structure.
167
+ - Create a helpful README.md that explains the project's purpose and how to run it.
168
+ - Create a standard .gitignore file.
169
+ - Create a basic Dockerfile for containerization.
170
+ Output ONLY a single JSON object where keys are the filenames ("README.md", ".gitignore", "Dockerfile") and values are their complete content."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171
  }
172
 
173
+ # ------------------------------ FILE SYSTEM TOOLS ------------------------------
174
+ def get_project_dir(user_id, project_id):
175
+ path = os.path.join(PROJECT_ROOT, str(user_id), str(project_id))
176
+ os.makedirs(path, exist_ok=True)
177
+ return path
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
+ def create_file(project_dir, path, content):
180
+ full_path = os.path.join(project_dir, path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  os.makedirs(os.path.dirname(full_path), exist_ok=True)
182
+ with open(full_path, 'w', encoding='utf-8') as f:
183
  f.write(content)
 
 
 
 
 
 
 
 
 
184
 
185
+ def read_file(project_dir, path):
186
+ full_path = os.path.join(project_dir, path)
187
+ if not os.path.exists(full_path): return None
188
+ with open(full_path, 'r', encoding='utf-8') as f:
189
+ return f.read()
 
 
190
 
191
+ def zip_project(project_dir, project_id):
192
+ zip_filename = f"project_{project_id}.zip"
193
+ zip_path = os.path.join(os.path.dirname(project_dir), zip_filename) # Place zip in user folder, not project folder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
194
  with zipfile.ZipFile(zip_path, 'w', zipfile.ZIP_DEFLATED) as zf:
195
+ for root, _, files in os.walk(project_dir):
196
+ for file in files:
197
+ full_path = os.path.join(root, file)
198
+ arcname = os.path.relpath(full_path, project_dir)
 
199
  zf.write(full_path, arcname)
200
  return zip_path
201
 
202
+ # ------------------------------ CORE AI & AGENT LOGIC ------------------------------
203
+ def _extract_json(text: str) -> Optional[Dict[str, Any]]:
204
+ """Robustly extracts a JSON object from a string, even if wrapped in markdown."""
205
+ match = re.search(r"```json\s*([\s\S]*?)\s*```|(\{[\s\S]*\})", text)
206
+ if not match:
207
+ return None
208
+ json_str = match.group(1) or match.group(2)
209
+ try:
210
+ return json.loads(json_str)
211
+ except json.JSONDecodeError:
212
+ print(f"Failed to decode JSON: {json_str[:200]}...")
213
+ return None
214
+
215
+ def generate_with_model(role: str, prompt: str) -> str:
216
+ """Generates a response from a cached model for a given role and prompt."""
217
  try:
218
  model_name = MODEL_REGISTRY[role]
219
  tokenizer, model = load_model(model_name)
220
+
221
+ messages = [{"role": "system", "content": ROLE_PROMPTS[role]}, {"role": "user", "content": prompt}]
222
+ input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
223
+ inputs = tokenizer(input_text, return_tensors="pt").to(model.device)
224
 
225
+ outputs = model.generate(**inputs, max_new_tokens=2048, pad_token_id=tokenizer.eos_token_id)
226
+ response = tokenizer.decode(outputs[0][len(inputs.input_ids[0]):], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  return response.strip()
228
 
229
  except Exception as e:
230
+ print(f"Error during model generation for role {role}: {e}")
231
+ return f'{{"error": "Failed to generate response: {str(e)}"}}'
232
 
233
+ # ------------------------------ THE AGENT CHAIN EXECUTOR ------------------------------
234
  def run_agent_chain(project_id, user_id, initial_prompt):
235
+ project_dir = get_project_dir(user_id, project_id)
236
+ log_entries = []
237
+
238
+ def log_step(agent, action, output=""):
239
+ log_entry = f"**[{agent.upper()}]**: {action}\n"
240
+ if output:
241
+ log_entry += f"```\n{output[:1000]}{'...' if len(output) > 1000 else ''}\n```\n---\n"
242
+ log_entries.append(log_entry)
243
+ update_project_status(project_id, "running", logs="".join(log_entries))
244
+
245
  try:
246
+ log_step("SYSTEM", f"Initializing project directory: {project_dir}")
247
+ update_project_status(project_id, "running", logs="Agent team is assembling...")
248
+
249
+ # 1. PLANNER
250
+ log_step("PLANNER", "Analyzing user request and creating a project plan...")
251
+ plan_response = generate_with_model("planner", initial_prompt)
252
+ plan_data = _extract_json(plan_response)
253
+ if not plan_data: raise ValueError("Planner failed to create a valid JSON plan.")
254
+ log_step("PLANNER", "Plan created successfully.", json.dumps(plan_data, indent=2))
255
+
256
+ # 2. ARCHITECT
257
+ log_step("ARCHITECT", "Creating initial file skeletons based on the plan...")
258
+ arch_prompt = f"Create initial content for the following files:\n{json.dumps(plan_data['files'])}"
259
+ arch_response = generate_with_model("architect", arch_prompt)
260
+ arch_data = _extract_json(arch_response)
261
+ if not arch_data: raise ValueError("Architect failed to create valid JSON file structures.")
262
+ for path, content in arch_data.items():
263
+ create_file(project_dir, path, content)
264
+ log_step("ARCHITECT", "File skeletons created.", "\n".join(arch_data.keys()))
265
+
266
+ # 3. CODER
267
+ source_files = [f for f in plan_data['files'] if f.startswith('src/') and f.endswith('.py')]
268
+ for file_path in source_files:
269
+ log_step("CODER", f"Writing complete code for `{file_path}`...")
270
+ coder_prompt = f"Based on the project plan ({plan_data['purpose']}), write the full Python code for the file: `{file_path}`."
271
+ code = generate_with_model("coder", coder_prompt)
272
+ create_file(project_dir, file_path, code)
273
+ log_step("CODER", f"Finished writing code for `{file_path}`.", code)
274
+
275
+ # 4. REVIEWER
276
+ log_step("REVIEWER", "Reviewing all generated source code for issues...")
277
+ for file_path in source_files:
278
+ code_content = read_file(project_dir, file_path)
279
+ review_prompt = f"Review the following code from `{file_path}`:\n\n{code_content}"
280
+ review_response = generate_with_model("reviewer", review_prompt)
281
+ review_data = _extract_json(review_response)
282
+ if review_data and review_data.get("has_issues"):
283
+ log_step("REVIEWER", f"Found issues in `{file_path}`.", review_data.get("suggestions"))
284
+ # In a more advanced version, this would trigger the CODER again (a "fix loop")
285
+ else:
286
+ log_step("REVIEWER", f"No major issues found in `{file_path}`.")
287
+
288
+ # 5. TESTER
289
+ log_step("TESTER", "Writing unit tests for all source code...")
290
+ for file_path in source_files:
291
+ code_content = read_file(project_dir, file_path)
292
+ test_file_path = os.path.join("tests", f"test_{os.path.basename(file_path)}")
293
+ tester_prompt = f"Write a complete pytest test file named `{test_file_path}` for the following code from `{file_path}`:\n\n{code_content}"
294
+ test_code = generate_with_model("tester", tester_prompt)
295
+ create_file(project_dir, test_file_path, test_code)
296
+ log_step("TESTER", f"Generated test file `{test_file_path}`.", test_code)
297
+
298
+ # 6. PUBLISHER
299
+ log_step("PUBLISHER", "Generating final documentation and configuration files...")
300
+ pub_prompt = f"The project has the following files: {json.dumps(list(os.walk(project_dir)))}. Generate README.md, .gitignore, and Dockerfile."
301
+ pub_response = generate_with_model("publisher", pub_prompt)
302
+ pub_data = _extract_json(pub_response)
303
+ if not pub_data: raise ValueError("Publisher failed to create valid final assets.")
304
+ for path, content in pub_data.items():
305
+ create_file(project_dir, path, content)
306
+ log_step("PUBLISHER", "Final assets created.", json.dumps(pub_data, indent=2))
307
+
308
+ # 7. FINALIZATION
309
+ log_step("SYSTEM", "Packaging the complete project into a ZIP file...")
310
+ zip_path = zip_project(project_dir, project_id)
311
+ update_project_status(project_id, "completed", logs="".join(log_entries), zip_path=zip_path)
312
+ log_step("SYSTEM", "Project completed successfully!", f"Download available at: {zip_path}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
 
314
  except Exception as e:
315
+ tb_str = traceback.format_exc()
316
+ print(f"--- AGENT CHAIN FAILED for project {project_id} ---\n{tb_str}\n--------------------")
317
+ error_log = "".join(log_entries) + f"\n\n❌ **CRITICAL ERROR:**\nAn unexpected error occurred and the process had to stop.\n\n**Details:**\n```{str(e)}\n\n{tb_str}```"
318
+ update_project_status(project_id, "failed", logs=error_log)
319
+
320
 
321
  # ------------------------------ JOB QUEUE ------------------------------
322
+ # Set to 1 worker to prevent VRAM OOM errors by running models sequentially.
323
+ executor = concurrent.futures.ThreadPoolExecutor(max_workers=1)
324
 
325
  def queue_job(project_id, user_id, prompt):
326
+ print(f"Queuing job for project_id: {project_id}, user_id: {user_id}")
327
  executor.submit(run_agent_chain, project_id, user_id, prompt)