mrmadblack commited on
Commit
11024e6
·
verified ·
1 Parent(s): f7db6c7

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +47 -28
server.py CHANGED
@@ -3,58 +3,56 @@ from pydantic import BaseModel
3
  import subprocess
4
  import uvicorn
5
  import os
 
6
 
7
  app = FastAPI()
8
 
9
- # ---------------------------
10
- # Models
11
- # ---------------------------
12
-
13
  MODELS = {
14
  "tinyllama": "models/tinyllama.gguf",
15
  "qwen": "models/qwen1.5b.gguf",
16
  "gemma": "models/gemma2b.gguf"
17
  }
18
 
19
- # ---------------------------
20
- # Ensure model folder exists
21
- # ---------------------------
22
-
23
- os.makedirs("models", exist_ok=True)
24
-
25
-
26
- # ---------------------------
27
- # Request models
28
- # ---------------------------
29
-
30
  class ChatRequest(BaseModel):
31
  model: str
32
  messages: list
33
 
34
-
35
  class GenerateRequest(BaseModel):
36
  model: str
37
  prompt: str
38
 
39
 
 
 
 
 
 
 
 
 
 
 
 
40
  # ---------------------------
41
  # Prompt builder
42
  # ---------------------------
43
 
44
  def build_prompt(messages):
 
 
 
45
  prompt = ""
46
 
47
  for m in messages:
48
  role = m.get("role", "user")
49
  content = m.get("content", "")
50
 
51
- if role == "assistant":
52
- prompt += f"assistant: {content}\n"
53
- else:
54
- prompt += f"user: {content}\n"
55
 
56
  prompt += "assistant:"
57
 
 
 
58
  return prompt
59
 
60
 
@@ -64,24 +62,38 @@ def build_prompt(messages):
64
 
65
  def run_model(model_path, prompt):
66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
  result = subprocess.run(
68
- [
69
- "./llama.cpp/build/bin/llama-cli",
70
- "-m", model_path,
71
- "-p", prompt,
72
- "-n", "200",
73
- "--no-display-prompt"
74
- ],
75
  capture_output=True,
76
  text=True
77
  )
78
 
 
 
 
79
  output = result.stdout.strip()
80
 
81
- # Clean assistant prefix
82
  if "assistant:" in output:
83
  output = output.split("assistant:")[-1].strip()
84
 
 
 
85
  return output
86
 
87
 
@@ -91,6 +103,7 @@ def run_model(model_path, prompt):
91
 
92
  @app.get("/")
93
  def root():
 
94
  return {"status": "running"}
95
 
96
 
@@ -109,6 +122,8 @@ def list_models():
109
  "model": name
110
  })
111
 
 
 
112
  return {"models": models}
113
 
114
 
@@ -119,6 +134,8 @@ def list_models():
119
  @app.post("/api/generate")
120
  def generate(req: GenerateRequest):
121
 
 
 
122
  if req.model not in MODELS:
123
  return {"error": "model not found"}
124
 
@@ -140,6 +157,8 @@ def generate(req: GenerateRequest):
140
  @app.post("/api/chat")
141
  def chat(req: ChatRequest):
142
 
 
 
143
  if req.model not in MODELS:
144
  return {"error": "model not found"}
145
 
 
3
  import subprocess
4
  import uvicorn
5
  import os
6
+ import json
7
 
8
  app = FastAPI()
9
 
 
 
 
 
10
  MODELS = {
11
  "tinyllama": "models/tinyllama.gguf",
12
  "qwen": "models/qwen1.5b.gguf",
13
  "gemma": "models/gemma2b.gguf"
14
  }
15
 
 
 
 
 
 
 
 
 
 
 
 
16
  class ChatRequest(BaseModel):
17
  model: str
18
  messages: list
19
 
 
20
  class GenerateRequest(BaseModel):
21
  model: str
22
  prompt: str
23
 
24
 
25
+ # ---------------------------
26
+ # Utility Logging
27
+ # ---------------------------
28
+
29
+ def log(title, data):
30
+ print("\n==============================")
31
+ print(title)
32
+ print(data)
33
+ print("==============================\n")
34
+
35
+
36
  # ---------------------------
37
  # Prompt builder
38
  # ---------------------------
39
 
40
  def build_prompt(messages):
41
+
42
+ log("CHAT HISTORY", json.dumps(messages, indent=2))
43
+
44
  prompt = ""
45
 
46
  for m in messages:
47
  role = m.get("role", "user")
48
  content = m.get("content", "")
49
 
50
+ prompt += f"{role}: {content}\n"
 
 
 
51
 
52
  prompt += "assistant:"
53
 
54
+ log("FINAL PROMPT", prompt)
55
+
56
  return prompt
57
 
58
 
 
62
 
63
  def run_model(model_path, prompt):
64
 
65
+ log("MODEL PATH", model_path)
66
+
67
+ if not os.path.exists(model_path):
68
+ log("ERROR", f"Model file missing: {model_path}")
69
+ return "Model file not found"
70
+
71
+ command = [
72
+ "./llama.cpp/build/bin/llama-cli",
73
+ "-m", model_path,
74
+ "-p", prompt,
75
+ "-n", "200",
76
+ "--no-display-prompt"
77
+ ]
78
+
79
+ log("EXEC COMMAND", command)
80
+
81
  result = subprocess.run(
82
+ command,
 
 
 
 
 
 
83
  capture_output=True,
84
  text=True
85
  )
86
 
87
+ log("LLAMA STDOUT", result.stdout)
88
+ log("LLAMA STDERR", result.stderr)
89
+
90
  output = result.stdout.strip()
91
 
 
92
  if "assistant:" in output:
93
  output = output.split("assistant:")[-1].strip()
94
 
95
+ log("FINAL OUTPUT", output)
96
+
97
  return output
98
 
99
 
 
103
 
104
  @app.get("/")
105
  def root():
106
+ log("SERVER STATUS", "Server running")
107
  return {"status": "running"}
108
 
109
 
 
122
  "model": name
123
  })
124
 
125
+ log("MODEL LIST REQUEST", models)
126
+
127
  return {"models": models}
128
 
129
 
 
134
  @app.post("/api/generate")
135
  def generate(req: GenerateRequest):
136
 
137
+ log("GENERATE REQUEST", req.dict())
138
+
139
  if req.model not in MODELS:
140
  return {"error": "model not found"}
141
 
 
157
  @app.post("/api/chat")
158
  def chat(req: ChatRequest):
159
 
160
+ log("CHAT REQUEST", req.dict())
161
+
162
  if req.model not in MODELS:
163
  return {"error": "model not found"}
164