Adi362 commited on
Commit
562d032
·
verified ·
1 Parent(s): 2034a63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -10
app.py CHANGED
@@ -1,11 +1,26 @@
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
 
4
 
5
  app = FastAPI()
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  llm = Llama(
8
- model_path="/models/model.gguf",
9
  n_ctx=2048,
10
  n_threads=2,
11
  n_batch=128,
@@ -17,25 +32,32 @@ class Message(BaseModel):
17
  content: str
18
 
19
  class ChatRequest(BaseModel):
20
- messages: list[Message]
21
 
 
22
  @app.post("/v1/chat")
23
  def chat(req: ChatRequest):
24
- prompt = "<|system|>\nYou are a helpful, concise chatbot.\n"
 
25
  for m in req.messages:
26
- prompt += f"<|{m.role}|>\n{m.content}\n"
27
- prompt += "<|assistant|>\n"
 
 
28
 
29
  output = llm(
30
  prompt,
31
- max_tokens=256,
32
- temperature=0.7,
33
  top_p=0.9,
34
- stop=["<|user|>", "<|system|>"]
 
35
  )
36
 
 
 
37
  return {
38
  "model": "edyx-convo",
39
- "text": output["choices"][0]["text"].strip(),
40
- "tokens": output["usage"]["total_tokens"],
41
  }
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
+ from typing import List
5
 
6
  app = FastAPI()
7
 
8
+
9
+ SYSTEM_PROMPT = """You are edyx-convo.
10
+ You are a concise, neutral, developer-grade assistant.
11
+
12
+ Rules:
13
+ - Answer clearly and directly
14
+ - No marketing language
15
+ - No emojis
16
+ - No self-praise
17
+ - Default to 1–3 sentences
18
+ - Expand only if explicitly asked
19
+ """
20
+
21
+
22
  llm = Llama(
23
+ model_path="./model.gguf",
24
  n_ctx=2048,
25
  n_threads=2,
26
  n_batch=128,
 
32
  content: str
33
 
34
  class ChatRequest(BaseModel):
35
+ messages: List[Message]
36
 
37
+ # ---- CHAT ENDPOINT ----
38
  @app.post("/v1/chat")
39
  def chat(req: ChatRequest):
40
+ prompt = SYSTEM_PROMPT + "\n\n"
41
+
42
  for m in req.messages:
43
+ role = m.role.lower()
44
+ prompt += f"{role}: {m.content}\n"
45
+
46
+ prompt += "assistant:"
47
 
48
  output = llm(
49
  prompt,
50
+ max_tokens=128,
51
+ temperature=0.4,
52
  top_p=0.9,
53
+ repeat_penalty=1.15,
54
+ stop=["user:", "assistant:"]
55
  )
56
 
57
+ text = output["choices"][0]["text"].strip()
58
+
59
  return {
60
  "model": "edyx-convo",
61
+ "text": text,
62
+ "tokens": output["usage"]["total_tokens"]
63
  }