PoppaYAO commited on
Commit
e58f496
·
verified ·
1 Parent(s): 2ccaeaa

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +124 -23
server.py CHANGED
@@ -1,33 +1,134 @@
1
- from fastapi import FastAPI
2
- import subprocess
3
  import os
 
 
 
 
 
 
 
4
 
5
- app = FastAPI()
 
 
 
6
 
7
- MODEL_URL = "https://huggingface.co/TheBloke/deepseek-coder-v2-lite-GGUF/resolve/main/deepseek-coder-v2-lite.Q4_K_M.gguf"
8
- MODEL_PATH = "deepseek-coder.gguf"
9
 
10
- def download_model():
11
- if not os.path.exists(MODEL_PATH):
12
- subprocess.run(["wget", MODEL_URL, "-O", MODEL_PATH])
 
 
 
 
13
 
14
- download_model()
 
 
 
 
 
 
 
15
 
16
- @app.get("/")
17
- def read_root():
18
- return {"status": "DeepSeek server running"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
- @app.post("/generate")
21
- def generate(prompt: str):
 
 
 
22
 
23
- cmd = [
24
- "/app/llama.cpp/build/bin/llama-cli",
25
- "-m", MODEL_PATH,
26
- "-p", prompt,
27
- "-n", "200",
28
- "--ctx-size", "8192"
29
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
- result = subprocess.run(cmd, capture_output=True, text=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
- return {"response": result.stdout}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ import json
3
+ import subprocess
4
+ from typing import List, Optional
5
+ from fastapi import FastAPI, HTTPException
6
+ from pydantic import BaseModel
7
+ from huggingface_hub import hf_hub_download
8
+ from llama_cpp import Llama
9
 
10
+ # --- Configuration ---
11
+ MODEL_ID = "mjrdbld/deepseek-coder-v2-lite-instruct-gguf"
12
+ MODEL_FILE = "deepseek-coder-v2-lite-instruct.Q4_K_M.gguf"
13
+ MODEL_PATH = os.path.join("models", MODEL_FILE)
14
 
15
+ app = FastAPI(title="Autonomous Coding AI")
 
16
 
17
+ # --- 1. Model Loader (The Engine) ---
18
+ print("Checking model existence...")
19
+ if not os.path.exists(MODEL_PATH):
20
+ print("Model not found. Downloading...")
21
+ os.makedirs("models", exist_ok=True)
22
+ hf_hub_download(repo_id=MODEL_ID, filename=MODEL_FILE, local_dir="models")
23
+ print("Download complete.")
24
 
25
+ print("Loading model into memory (this takes a minute)...")
26
+ llm = Llama(
27
+ model_path=MODEL_PATH,
28
+ n_ctx=4096, # Context window size
29
+ n_gpu_layers=0, # 0 for CPU only
30
+ verbose=False
31
+ )
32
+ print("Model loaded successfully!")
33
 
34
+ # --- 2. Agent System (The Brain) ---
35
+ # We are creating a "Native" Agency system to keep it lightweight and free.
36
+
37
+ SYSTEM_PROMPT_ARCHITECT = """
38
+ You are an Architect Agent. Your job is to analyze user requirements and output a JSON project structure.
39
+ Do not write code. Only output JSON.
40
+ Example Output:
41
+ {
42
+ "project_name": "todo_app",
43
+ "files": ["main.py", "utils.py"],
44
+ "tech_stack": ["Python", "FastAPI"]
45
+ }
46
+ """
47
+
48
+ SYSTEM_PROMPT_CODER = """
49
+ You are a Coder Agent. You write clean, efficient Python code based on the architecture provided.
50
+ You must output only the code block.
51
+ """
52
 
53
+ SYSTEM_PROMPT_SECURITY = """
54
+ You are a Security Agent. You review code for vulnerabilities.
55
+ If the code is safe, output: 'SECURITY CHECK PASSED'.
56
+ If unsafe, output: 'SECURITY ALERT: [reason]'.
57
+ """
58
 
59
+ def run_agent(system_prompt: str, user_prompt: str) -> str:
60
+ """Runs the LLM with a specific role."""
61
+ response = llm.create_chat_completion(
62
+ messages=[
63
+ {"role": "system", "content": system_prompt},
64
+ {"role": "user", "content": user_prompt}
65
+ ],
66
+ max_tokens=2000,
67
+ temperature=0.7
68
+ )
69
+ return response['choices'][0]['message']['content']
70
+
71
+ # --- 3. Tool: Security Validator (using promptfoo) ---
72
+ def run_security_audit(code: str) -> dict:
73
+ """
74
+ Uses promptfoo logic to check for bad practices.
75
+ """
76
+ # For this demo, we use a lightweight Python check.
77
+ # In production, this calls the real 'promptfoo' CLI.
78
+ unsafe_keywords = ["eval(", "exec(", "password =", "rm -rf"]
79
+ found_issues = []
80
+
81
+ for keyword in unsafe_keywords:
82
+ if keyword in code:
83
+ found_issues.append(f"Found unsafe pattern: {keyword}")
84
+
85
+ if found_issues:
86
+ return {"status": "FAILED", "details": found_issues}
87
+ return {"status": "PASSED", "details": "Code looks clean."}
88
+
89
+ # --- 4. API Endpoints ---
90
+
91
+ class ChatRequest(BaseModel):
92
+ messages: List[dict]
93
+ max_tokens: Optional[int] = 512
94
+
95
+ @app.get("/")
96
+ def root():
97
+ return {"status": "Running", "message": "Autonomous AI is online."}
98
 
99
+ @app.post("/v1/chat/completions")
100
+ def chat_completions(request: ChatRequest):
101
+ """
102
+ OpenAI Compatible Endpoint used by OpenClaw.
103
+ """
104
+ user_message = request.messages[-1]['content']
105
+
106
+ # Step 1: Planning
107
+ print(f">>> [Orchestrator] Received task: {user_message}")
108
+ architecture_plan = run_agent(SYSTEM_PROMPT_ARCHITECT, user_message)
109
+ print(f">>> [Architect] Plan generated.")
110
+
111
+ # Step 2: Coding
112
+ code_output = run_agent(SYSTEM_PROMPT_CODER, f"Architecture:\n{architecture_plan}\n\nRequirement:\n{user_message}")
113
+ print(f">>> [Coder] Code generated.")
114
+
115
+ # Step 3: Security Check
116
+ audit_result = run_security_audit(code_output)
117
+ print(f">>> [Security] Audit result: {audit_result['status']}")
118
+
119
+ # Step 4: Final Formatting
120
+ final_response = f"Project Plan:\n{architecture_plan}\n\nCode:\n```python\n{code_output}\n```\n\nSecurity Audit: {audit_result['status']}"
121
 
122
+ # Format for OpenAI compatibility
123
+ return {
124
+ "id": "chatcmpl-001",
125
+ "object": "chat.completion",
126
+ "choices": [{
127
+ "index": 0,
128
+ "message": {
129
+ "role": "assistant",
130
+ "content": final_response
131
+ },
132
+ "finish_reason": "stop"
133
+ }]
134
+ }