import os import json from typing import List, Optional from fastapi import FastAPI, HTTPException from pydantic import BaseModel from huggingface_hub import hf_hub_download from llama_cpp import Llama # --- Configuration --- # MODEL: Qwen 2.5 14B Instruct Abliterated (GGUF Version) # WHY: Uncensored (Abliterated) + Fits in Free CPU Memory (GGUF Q4) # REPO: mradermacher (Verified GGUF provider for huihui-ai models) MODEL_ID = "mradermacher/Qwen2.5-14B-Instruct-abliterated-GGUF" MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf" MODEL_PATH = os.path.join("models", MODEL_FILE) app = FastAPI(title="Autonomous Coding AI") # --- 1. Model Loader --- print("Checking model existence...") if not os.path.exists(MODEL_PATH): print(f"Model not found. Downloading {MODEL_FILE}...") os.makedirs("models", exist_ok=True) hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models") print("Download complete.") print("Loading model into memory...") # We use llama.cpp (Llama class) to run this efficiently on CPU llm = Llama( model_path=MODEL_PATH, n_ctx=4096, # Context window size n_gpu_layers=0, # CPU only verbose=False ) print("Model loaded successfully!") # --- 2. Agent System --- SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests." SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests." SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities." def run_agent(system_prompt: str, user_prompt: str) -> str: response = llm.create_chat_completion( messages=[ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt} ], max_tokens=2000, temperature=0.7 ) return response['choices'][0]['message']['content'] # --- 3. Security Tool --- def run_security_audit(code: str) -> dict: unsafe_keywords = ["eval(", "exec(", "password =", "rm -rf"] found_issues = [] for keyword in unsafe_keywords: if keyword in code: found_issues.append(f"Found unsafe pattern: {keyword}") if found_issues: return {"status": "FAILED", "details": found_issues} return {"status": "PASSED", "details": "Code looks clean."} # --- 4. API Endpoints --- class ChatRequest(BaseModel): messages: List[dict] max_tokens: Optional[int] = 512 @app.get("/") def root(): return {"status": "Running", "message": "Autonomous AI is online."} @app.post("/v1/chat/completions") def chat_completions(request: ChatRequest): user_message = request.messages[-1]['content'] print(f">>> [Orchestrator] Received task: {user_message}") architecture_plan = run_agent(SYSTEM_PROMPT_ARCHITECT, user_message) code_output = run_agent(SYSTEM_PROMPT_CODER, f"Architecture:\n{architecture_plan}\n\nRequirement:\n{user_message}") audit_result = run_security_audit(code_output) final_response = f"Project Plan:\n{architecture_plan}\n\nCode:\n```python\n{code_output}\n```\n\nSecurity Audit: {audit_result['status']}" return { "id": "chatcmpl-001", "object": "chat.completion", "choices": [{ "index": 0, "message": { "role": "assistant", "content": final_response }, "finish_reason": "stop" }] }