Spaces:

edyxapi
/

convo-model

Running

Adi362 commited on 10 days ago

Commit

f5c37d3

verified ·

1 Parent(s): 53f226c

removed token limits from our end

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,27 +1,19 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
-from typing import List
 app = FastAPI()
-SYSTEM_PROMPT = """You are edyx-convo.
-You are a neutral, concise, API-grade assistant.
-STRICT RULES:
-- Do NOT ask questions unless explicitly requested
-- Do NOT use greetings
-- Do NOT introduce yourself conversationally
-- Do NOT use emojis or marketing language
-- Answer in 1–2 sentences by default
-- Be factual and minimal
 """
 llm = Llama(
     model_path="/models/model.gguf",
-    n_ctx=2048,
     n_threads=2,
     n_batch=128,
     verbose=False
@@ -34,23 +26,33 @@ class Message(BaseModel):
 class ChatRequest(BaseModel):
     messages: List[Message]
 @app.post("/v1/chat")
 def chat(req: ChatRequest):
     prompt = SYSTEM_PROMPT + "\n\n"
     for m in req.messages:
         role = m.role.lower()
-        prompt += f"{role}: {m.content}\n"
     prompt += "assistant:"
     output = llm(
         prompt,
-        max_tokens=128,
-        temperature=0.4,
         top_p=0.9,
-        repeat_penalty=1.15,
-        stop=["user:", "assistant:"]
     )
     text = output["choices"][0]["text"].strip()
@@ -59,4 +61,4 @@ def chat(req: ChatRequest):
         "model": "edyx-convo",
         "text": text,
         "tokens": output["usage"]["total_tokens"]
-    }

 from fastapi import FastAPI
 from pydantic import BaseModel
 from llama_cpp import Llama
+from typing import List, Optional
 app = FastAPI()
+SYSTEM_PROMPT = """You are Edyx.
+You are a helpful, harmless, and honest AI assistant.
 """
 llm = Llama(
     model_path="/models/model.gguf",
+    n_ctx=4096,
     n_threads=2,
     n_batch=128,
     verbose=False
 class ChatRequest(BaseModel):
     messages: List[Message]
+    max_tokens: Optional[int] = 1024
+    temperature: Optional[float] = 0.7
+    repetition_penalty: Optional[float] = 1.1
 @app.post("/v1/chat")
 def chat(req: ChatRequest):
     prompt = SYSTEM_PROMPT + "\n\n"
     for m in req.messages:
         role = m.role.lower()
+        if role == "system":
+             prompt = f"{m.content}\n\n"
+        else:
+             prompt += f"{role}: {m.content}\n"
     prompt += "assistant:"
     output = llm(
         prompt,
+        max_tokens=req.max_tokens,
+        temperature=req.temperature,
         top_p=0.9,
+        repeat_penalty=req.repetition_penalty,
+        stop=["user:", "assistant:", "<|end|>", "User:"]
     )
     text = output["choices"][0]["text"].strip()
         "model": "edyx-convo",
         "text": text,
         "tokens": output["usage"]["total_tokens"]
+    }