CrazyMonkey0 commited on
Commit
3ad9eac
·
1 Parent(s): 245cf59

fix(chat): use llm() directly instead of create_chat_completion

Browse files
Files changed (1) hide show
  1. app/routes/nlp.py +18 -26
app/routes/nlp.py CHANGED
@@ -1,13 +1,11 @@
1
  from fastapi import APIRouter, Request, Response
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
4
- from llama_cpp.llama_chat_format import Qwen25VLChatHandler
5
  from .tts import save_audio
6
  import uuid
7
 
8
  router = APIRouter()
9
 
10
-
11
  SYSTEM_PROMPT = """You are Emma, a friendly English teacher helping learners improve their English.
12
 
13
  Reply naturally to the user's message (2-4 sentences), then if you find errors, add:
@@ -18,57 +16,51 @@ Original: "..."
18
  Correction: "..."
19
  Explanation: [one simple sentence]
20
 
21
- Analyze only grammar, vocabulary, spelling, and common learner mistakes. Be encouraging!"""
 
22
 
23
  class ChatRequest(BaseModel):
24
  message: str
25
 
26
  # Load NLP model
27
  def load_model_nlp():
28
- chat_handler = Qwen25VLChatHandler.from_pretrained(
29
- repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
30
- filename="qwen2.5-3b-instruct-q5_0.gguf",
31
- )
32
-
33
  llm = Llama.from_pretrained(
34
  repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
35
  filename="qwen2.5-3b-instruct-q5_0.gguf",
36
- chat_handler=chat_handler,
37
- n_ctx=2048,
38
- verbose=False, # off logging
39
  )
40
  print("[INFO] NLP model loaded.")
41
  return llm
42
 
43
-
44
  @router.post("/chat")
45
  async def chat(request: Request, chat_request: ChatRequest):
46
  """Endpoint for chat with the NLP model."""
47
  text = chat_request.message
48
 
49
- # Download model from app state
50
  llm = request.app.state.model_nlp
51
-
52
- # preparation of messages
53
- messages = [
54
- {"role": "system", "content": SYSTEM_PROMPT},
55
- {"role": "user", "content": text}
56
- ]
57
-
58
- # Generate response
59
- output = llm.create_chat_completion(
60
- messages=messages,
61
  max_tokens=512,
62
  temperature=0.7,
63
  top_p=0.9,
64
  top_k=50,
 
65
  )
66
-
67
- response_text = output["choices"][0]["message"]["content"].strip()
 
 
68
  audio_bytes = save_audio(request, response_text)
69
 
 
70
  boundary = uuid.uuid4().hex
71
-
72
  body = (
73
  f"--{boundary}\r\n"
74
  f"Content-Disposition: form-data; name=\"text\"\r\n\r\n"
 
1
  from fastapi import APIRouter, Request, Response
2
  from pydantic import BaseModel
3
  from llama_cpp import Llama
 
4
  from .tts import save_audio
5
  import uuid
6
 
7
  router = APIRouter()
8
 
 
9
  SYSTEM_PROMPT = """You are Emma, a friendly English teacher helping learners improve their English.
10
 
11
  Reply naturally to the user's message (2-4 sentences), then if you find errors, add:
 
16
  Correction: "..."
17
  Explanation: [one simple sentence]
18
 
19
+ Analyze only grammar, vocabulary, spelling, and common learner mistakes. Be encouraging!
20
+ """
21
 
22
  class ChatRequest(BaseModel):
23
  message: str
24
 
25
  # Load NLP model
26
  def load_model_nlp():
 
 
 
 
 
27
  llm = Llama.from_pretrained(
28
  repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
29
  filename="qwen2.5-3b-instruct-q5_0.gguf",
30
+ n_ctx=2048,
31
+ verbose=False
 
32
  )
33
  print("[INFO] NLP model loaded.")
34
  return llm
35
 
 
36
  @router.post("/chat")
37
  async def chat(request: Request, chat_request: ChatRequest):
38
  """Endpoint for chat with the NLP model."""
39
  text = chat_request.message
40
 
41
+ # Pobierz model z app state
42
  llm = request.app.state.model_nlp
43
+
44
+ # Przygotuj prompt ręcznie (multi-turn można rozszerzyć tu)
45
+ prompt = f"{SYSTEM_PROMPT}\n\nUser: {text}\nEmma:"
46
+
47
+ # Wygeneruj odpowiedź
48
+ output = llm(
49
+ prompt,
 
 
 
50
  max_tokens=512,
51
  temperature=0.7,
52
  top_p=0.9,
53
  top_k=50,
54
+ stop=["\nUser:", "\nEmma:"]
55
  )
56
+
57
+ response_text = output["choices"][0]["text"].strip()
58
+
59
+ # Generuj audio
60
  audio_bytes = save_audio(request, response_text)
61
 
62
+ # Przygotuj multipart/form-data
63
  boundary = uuid.uuid4().hex
 
64
  body = (
65
  f"--{boundary}\r\n"
66
  f"Content-Disposition: form-data; name=\"text\"\r\n\r\n"