mrmadblack commited on
Commit
f7db6c7
·
verified ·
1 Parent(s): ccd21fa

Update server.py

Browse files
Files changed (1) hide show
  1. server.py +108 -56
server.py CHANGED
@@ -2,55 +2,114 @@ from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  import subprocess
4
  import uvicorn
 
5
 
6
  app = FastAPI()
7
 
8
- MODELS={
9
- "tinyllama":"models/tinyllama.gguf",
10
- "qwen":"models/qwen1.5b.gguf",
11
- "gemma":"models/gemma2b.gguf"
 
 
 
 
12
  }
13
 
 
 
 
 
 
 
 
 
 
 
 
14
  class ChatRequest(BaseModel):
15
- model:str
16
- messages:list
 
17
 
18
  class GenerateRequest(BaseModel):
19
- model:str
20
- prompt:str
 
 
 
 
 
21
 
22
  def build_prompt(messages):
23
- prompt=""
 
24
  for m in messages:
25
- role=m["role"]
26
- content=m["content"]
27
- prompt += f"{role}: {content}\n"
28
- prompt+="assistant:"
 
 
 
 
 
 
29
  return prompt
30
 
31
 
32
  # ---------------------------
33
- # Root
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # ---------------------------
35
 
36
  @app.get("/")
37
  def root():
38
- return {"status":"running"}
39
 
40
 
41
  # ---------------------------
42
- # Model list (Ollama /api/tags)
43
  # ---------------------------
44
 
45
  @app.get("/api/tags")
46
  def list_models():
47
- return {
48
- "models":[
49
- {"name":"tinyllama"},
50
- {"name":"qwen"},
51
- {"name":"gemma"}
52
- ]
53
- }
 
 
 
54
 
55
 
56
  # ---------------------------
@@ -58,25 +117,19 @@ def list_models():
58
  # ---------------------------
59
 
60
  @app.post("/api/generate")
61
- def generate(req:GenerateRequest):
62
 
63
- model_path=MODELS[req.model]
 
64
 
65
- result=subprocess.run(
66
- [
67
- "./llama.cpp/build/bin/llama-cli",
68
- "-m",model_path,
69
- "-p",req.prompt,
70
- "-n","200"
71
- ],
72
- capture_output=True,
73
- text=True
74
- )
75
 
76
  return {
77
- "model":req.model,
78
- "response":result.stdout,
79
- "done":True
80
  }
81
 
82
 
@@ -85,31 +138,30 @@ def generate(req:GenerateRequest):
85
  # ---------------------------
86
 
87
  @app.post("/api/chat")
88
- def chat(req:ChatRequest):
89
 
90
- model_path=MODELS[req.model]
91
- prompt=build_prompt(req.messages)
92
 
93
- result=subprocess.run(
94
- [
95
- "./llama.cpp/build/bin/llama-cli",
96
- "-m",model_path,
97
- "-p",prompt,
98
- "-n","200"
99
- ],
100
- capture_output=True,
101
- text=True
102
- )
103
 
104
  return {
105
- "model":req.model,
106
- "message":{
107
- "role":"assistant",
108
- "content":result.stdout
109
  },
110
- "done":True
111
  }
112
 
113
 
 
 
 
 
114
  if __name__ == "__main__":
115
  uvicorn.run(app, host="0.0.0.0", port=7860)
 
2
  from pydantic import BaseModel
3
  import subprocess
4
  import uvicorn
5
+ import os
6
 
7
  app = FastAPI()
8
 
9
+ # ---------------------------
10
+ # Models
11
+ # ---------------------------
12
+
13
+ MODELS = {
14
+ "tinyllama": "models/tinyllama.gguf",
15
+ "qwen": "models/qwen1.5b.gguf",
16
+ "gemma": "models/gemma2b.gguf"
17
  }
18
 
19
+ # ---------------------------
20
+ # Ensure model folder exists
21
+ # ---------------------------
22
+
23
+ os.makedirs("models", exist_ok=True)
24
+
25
+
26
+ # ---------------------------
27
+ # Request models
28
+ # ---------------------------
29
+
30
  class ChatRequest(BaseModel):
31
+ model: str
32
+ messages: list
33
+
34
 
35
  class GenerateRequest(BaseModel):
36
+ model: str
37
+ prompt: str
38
+
39
+
40
+ # ---------------------------
41
+ # Prompt builder
42
+ # ---------------------------
43
 
44
  def build_prompt(messages):
45
+ prompt = ""
46
+
47
  for m in messages:
48
+ role = m.get("role", "user")
49
+ content = m.get("content", "")
50
+
51
+ if role == "assistant":
52
+ prompt += f"assistant: {content}\n"
53
+ else:
54
+ prompt += f"user: {content}\n"
55
+
56
+ prompt += "assistant:"
57
+
58
  return prompt
59
 
60
 
61
  # ---------------------------
62
+ # Run llama.cpp
63
+ # ---------------------------
64
+
65
+ def run_model(model_path, prompt):
66
+
67
+ result = subprocess.run(
68
+ [
69
+ "./llama.cpp/build/bin/llama-cli",
70
+ "-m", model_path,
71
+ "-p", prompt,
72
+ "-n", "200",
73
+ "--no-display-prompt"
74
+ ],
75
+ capture_output=True,
76
+ text=True
77
+ )
78
+
79
+ output = result.stdout.strip()
80
+
81
+ # Clean assistant prefix
82
+ if "assistant:" in output:
83
+ output = output.split("assistant:")[-1].strip()
84
+
85
+ return output
86
+
87
+
88
+ # ---------------------------
89
+ # Root endpoint
90
  # ---------------------------
91
 
92
  @app.get("/")
93
  def root():
94
+ return {"status": "running"}
95
 
96
 
97
  # ---------------------------
98
+ # Model list (Ollama compatible)
99
  # ---------------------------
100
 
101
  @app.get("/api/tags")
102
  def list_models():
103
+
104
+ models = []
105
+
106
+ for name in MODELS.keys():
107
+ models.append({
108
+ "name": name,
109
+ "model": name
110
+ })
111
+
112
+ return {"models": models}
113
 
114
 
115
  # ---------------------------
 
117
  # ---------------------------
118
 
119
  @app.post("/api/generate")
120
+ def generate(req: GenerateRequest):
121
 
122
+ if req.model not in MODELS:
123
+ return {"error": "model not found"}
124
 
125
+ model_path = MODELS[req.model]
126
+
127
+ response = run_model(model_path, req.prompt)
 
 
 
 
 
 
 
128
 
129
  return {
130
+ "model": req.model,
131
+ "response": response,
132
+ "done": True
133
  }
134
 
135
 
 
138
  # ---------------------------
139
 
140
  @app.post("/api/chat")
141
+ def chat(req: ChatRequest):
142
 
143
+ if req.model not in MODELS:
144
+ return {"error": "model not found"}
145
 
146
+ model_path = MODELS[req.model]
147
+
148
+ prompt = build_prompt(req.messages)
149
+
150
+ response = run_model(model_path, prompt)
 
 
 
 
 
151
 
152
  return {
153
+ "model": req.model,
154
+ "message": {
155
+ "role": "assistant",
156
+ "content": response
157
  },
158
+ "done": True
159
  }
160
 
161
 
162
+ # ---------------------------
163
+ # Start server
164
+ # ---------------------------
165
+
166
  if __name__ == "__main__":
167
  uvicorn.run(app, host="0.0.0.0", port=7860)