Spaces:

CrazyMonkey0
/

APi_English

Sleeping

App Files Files Community

CrazyMonkey0 commited on Dec 16, 2025

Commit

3ad9eac

1 Parent(s): 245cf59

fix(chat): use llm() directly instead of create_chat_completion

Browse files

Files changed (1) hide show

app/routes/nlp.py +18 -26

app/routes/nlp.py CHANGED Viewed

@@ -1,13 +1,11 @@
 from fastapi import APIRouter, Request, Response
 from pydantic import BaseModel
 from llama_cpp import Llama
-from llama_cpp.llama_chat_format import Qwen25VLChatHandler
 from .tts import save_audio
 import uuid
 router = APIRouter()
 SYSTEM_PROMPT = """You are Emma, a friendly English teacher helping learners improve their English.
 Reply naturally to the user's message (2-4 sentences), then if you find errors, add:
@@ -18,57 +16,51 @@ Original: "..."
 Correction: "..."
 Explanation: [one simple sentence]
-Analyze only grammar, vocabulary, spelling, and common learner mistakes. Be encouraging!"""
 class ChatRequest(BaseModel):
     message: str
 # Load NLP model
 def load_model_nlp():
-    chat_handler = Qwen25VLChatHandler.from_pretrained(
-        repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
-        filename="qwen2.5-3b-instruct-q5_0.gguf",
-    )
     llm = Llama.from_pretrained(
         repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
         filename="qwen2.5-3b-instruct-q5_0.gguf",
-        chat_handler=chat_handler,
-        n_ctx=2048,
-        verbose=False, # off logging
     )
     print("[INFO] NLP model loaded.")
     return llm
 @router.post("/chat")
 async def chat(request: Request, chat_request: ChatRequest):
     """Endpoint for chat with the NLP model."""
     text = chat_request.message
-    # Download model from app state
     llm = request.app.state.model_nlp
-    # preparation of messages
-    messages = [
-        {"role": "system", "content": SYSTEM_PROMPT},
-        {"role": "user", "content": text}
-    ]
-    # Generate response
-    output = llm.create_chat_completion(
-        messages=messages,
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
         top_k=50,
     )
-    response_text = output["choices"][0]["message"]["content"].strip()
     audio_bytes = save_audio(request, response_text)
     boundary = uuid.uuid4().hex
     body = (
         f"--{boundary}\r\n"
         f"Content-Disposition: form-data; name=\"text\"\r\n\r\n"

 from fastapi import APIRouter, Request, Response
 from pydantic import BaseModel
 from llama_cpp import Llama
 from .tts import save_audio
 import uuid
 router = APIRouter()
 SYSTEM_PROMPT = """You are Emma, a friendly English teacher helping learners improve their English.
 Reply naturally to the user's message (2-4 sentences), then if you find errors, add:
 Correction: "..."
 Explanation: [one simple sentence]
+Analyze only grammar, vocabulary, spelling, and common learner mistakes. Be encouraging!
+"""
 class ChatRequest(BaseModel):
     message: str
 # Load NLP model
 def load_model_nlp():
     llm = Llama.from_pretrained(
         repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
         filename="qwen2.5-3b-instruct-q5_0.gguf",
+        n_ctx=2048,
+        verbose=False
     )
     print("[INFO] NLP model loaded.")
     return llm
 @router.post("/chat")
 async def chat(request: Request, chat_request: ChatRequest):
     """Endpoint for chat with the NLP model."""
     text = chat_request.message
+    # Pobierz model z app state
     llm = request.app.state.model_nlp
+    # Przygotuj prompt ręcznie (multi-turn można rozszerzyć tu)
+    prompt = f"{SYSTEM_PROMPT}\n\nUser: {text}\nEmma:"
+    # Wygeneruj odpowiedź
+    output = llm(
+        prompt,
         max_tokens=512,
         temperature=0.7,
         top_p=0.9,
         top_k=50,
+        stop=["\nUser:", "\nEmma:"]
     )
+    response_text = output["choices"][0]["text"].strip()
+    # Generuj audio
     audio_bytes = save_audio(request, response_text)
+    # Przygotuj multipart/form-data
     boundary = uuid.uuid4().hex
     body = (
         f"--{boundary}\r\n"
         f"Content-Disposition: form-data; name=\"text\"\r\n\r\n"