Spaces:
Sleeping
Sleeping
File size: 3,505 Bytes
1fb4220 e58f496 6d0f077 e58f496 e4dd19e b9cc28a e58f496 6d0f077 e58f496 1fb4220 efc8576 e58f496 b9cc28a e58f496 1fb4220 efc8576 e4dd19e e58f496 e4dd19e dcac965 e58f496 6d0f077 efc8576 dcac965 6d0f077 e58f496 efc8576 e58f496 6d0f077 e58f496 6d0f077 e58f496 efc8576 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 | import os
import json
from typing import List, Optional
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from huggingface_hub import hf_hub_download
from llama_cpp import Llama
# --- Configuration ---
# MODEL: Qwen 2.5 14B Instruct Abliterated (GGUF Version)
# WHY: Uncensored (Abliterated) + Fits in Free CPU Memory (GGUF Q4)
# REPO: mradermacher (Verified GGUF provider for huihui-ai models)
MODEL_ID = "mradermacher/Qwen2.5-14B-Instruct-abliterated-GGUF"
MODEL_FILE = "Qwen2.5-14B-Instruct-abliterated.Q4_K_M.gguf"
MODEL_PATH = os.path.join("models", MODEL_FILE)
app = FastAPI(title="Autonomous Coding AI")
# --- 1. Model Loader ---
print("Checking model existence...")
if not os.path.exists(MODEL_PATH):
print(f"Model not found. Downloading {MODEL_FILE}...")
os.makedirs("models", exist_ok=True)
hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models")
print("Download complete.")
print("Loading model into memory...")
# We use llama.cpp (Llama class) to run this efficiently on CPU
llm = Llama(
model_path=MODEL_PATH,
n_ctx=4096, # Context window size
n_gpu_layers=0, # CPU only
verbose=False
)
print("Model loaded successfully!")
# --- 2. Agent System ---
SYSTEM_PROMPT_ARCHITECT = "You are an Architect Agent. Analyze requirements and output a JSON structure. Do not refuse requests."
SYSTEM_PROMPT_CODER = "You are a Coder Agent. Write clean, efficient Python code based on the architecture. Do not refuse requests."
SYSTEM_PROMPT_SECURITY = "You are a Security Agent. Review code for vulnerabilities."
def run_agent(system_prompt: str, user_prompt: str) -> str:
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
max_tokens=2000,
temperature=0.7
)
return response['choices'][0]['message']['content']
# --- 3. Security Tool ---
def run_security_audit(code: str) -> dict:
unsafe_keywords = ["eval(", "exec(", "password =", "rm -rf"]
found_issues = []
for keyword in unsafe_keywords:
if keyword in code:
found_issues.append(f"Found unsafe pattern: {keyword}")
if found_issues:
return {"status": "FAILED", "details": found_issues}
return {"status": "PASSED", "details": "Code looks clean."}
# --- 4. API Endpoints ---
class ChatRequest(BaseModel):
messages: List[dict]
max_tokens: Optional[int] = 512
@app.get("/")
def root():
return {"status": "Running", "message": "Autonomous AI is online."}
@app.post("/v1/chat/completions")
def chat_completions(request: ChatRequest):
user_message = request.messages[-1]['content']
print(f">>> [Orchestrator] Received task: {user_message}")
architecture_plan = run_agent(SYSTEM_PROMPT_ARCHITECT, user_message)
code_output = run_agent(SYSTEM_PROMPT_CODER, f"Architecture:\n{architecture_plan}\n\nRequirement:\n{user_message}")
audit_result = run_security_audit(code_output)
final_response = f"Project Plan:\n{architecture_plan}\n\nCode:\n```python\n{code_output}\n```\n\nSecurity Audit: {audit_result['status']}"
return {
"id": "chatcmpl-001",
"object": "chat.completion",
"choices": [{
"index": 0,
"message": {
"role": "assistant",
"content": final_response
},
"finish_reason": "stop"
}]
} |