CrazyMonkey0 commited on
Commit
bf1dc5f
·
1 Parent(s): fc8b522

fix: resolve model loading and state management issues

Browse files

- Fix load_model_nlp() to return only model (not tuple)
- Update startup_event to assign single model value
- Replace direct llm() call with create_chat_completion()
- Add proper error handling and logging
- Comment out unimplemented model loaders (TTS, ASR, Translation)
- Add health check endpoint to verify model loading status

Files changed (2) hide show
  1. app/main.py +1 -1
  2. app/routes/nlp.py +98 -11
app/main.py CHANGED
@@ -12,7 +12,7 @@ app = FastAPI(debug=False)
12
  async def startup_event():
13
  print("[INFO] Loading all models...")
14
  try:
15
- app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
16
  app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
17
  app.state.model_tts = load_model_tts()
18
  app.state.processor_asr, app.state.model_asr = load_model_asr()
 
12
  async def startup_event():
13
  print("[INFO] Loading all models...")
14
  try:
15
+ app.state.model_nlp = load_model_nlp()
16
  app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
17
  app.state.model_tts = load_model_tts()
18
  app.state.processor_asr, app.state.model_asr = load_model_asr()
app/routes/nlp.py CHANGED
@@ -14,26 +14,113 @@ def load_model_nlp():
14
  repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
15
  filename="qwen2.5-3b-instruct-q5_0.gguf",
16
  n_ctx=2048,
 
17
  )
 
18
  return llm
19
 
20
  @router.post("/chat")
21
- async def chat(request: Request, message: ChatRequest):
22
- text = message.message
 
 
 
23
  llm = request.app.state.model_nlp
 
 
 
 
 
24
 
25
- # Opcjonalnie dodaj wiadomość systemową
26
- prompt = f"You are Qwen, created by Alibaba Cloud. You help users learn English.\nUser: {text}\nAssistant:"
27
 
28
- # Generowanie odpowiedzi
29
- output = llm(prompt, max_tokens=128, temperature=0.7, top_p=0.9, top_k=50)
30
 
31
- response_text = output['choices'][0]['text'].strip()
32
 
33
- # Generate audio response (opcjonalnie)
34
- # url_path = save_audio(request, response_text)
35
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  return {
37
  "response": response_text,
38
- "audio": 'url_path' # placeholder
39
- }
 
14
  repo_id="Qwen/Qwen2.5-3B-Instruct-GGUF",
15
  filename="qwen2.5-3b-instruct-q5_0.gguf",
16
  n_ctx=2048,
17
+ verbose=False, # off logging
18
  )
19
+ print("[INFO] NLP model loaded.")
20
  return llm
21
 
22
  @router.post("/chat")
23
+ async def chat(request: Request, chat_request: ChatRequest):
24
+ """Endpoint do chatowania z modelem"""
25
+ text = chat_request.message
26
+
27
+ # Download model from app state
28
  llm = request.app.state.model_nlp
29
+
30
+ # preparation of messages
31
+ messages = [
32
+ {"role": "system", "content": """
33
+ You are Emma — a friendly, patient, encouraging native speaker of American English and an experienced English teacher. Assume every user is learning English.
34
 
35
+ Top priorities (in order):
 
36
 
37
+ First: Reply NATURALLY and CONVERSATIONALLY to the user’s most recent (last) message. The reply should sound like a warm, helpful human: concise (2–4 sentences), encouraging, and easy to understand.
 
38
 
39
+ Second: Immediately after that natural reply, analyze only that same most recent message for language errors and apply the correction rules below. Do not analyze earlier messages.
40
 
41
+ What to detect (error categories):
 
42
 
43
+ Grammar (tenses, word order, auxiliary duplication like “what’s is”, subject-verb agreement)
44
+
45
+ Vocabulary (word choice, false friends, awkward collocations)
46
+
47
+ Spelling
48
+
49
+ Punctuation
50
+
51
+ Register (formal vs. informal mismatch)
52
+
53
+ Typical learner errors (missing articles, capitalization mistakes, double auxiliaries, common typos)
54
+
55
+ Correction rules:
56
+
57
+ If any errors are found, append exactly one correction block at the end of your reply. If no errors are found, append nothing.
58
+
59
+ Corrections must be concise, clear, encouraging, and not overwhelming.
60
+
61
+ Explanations must be one sentence and simple.
62
+
63
+ Provide an example only if helpful, and keep it short (one sentence).
64
+
65
+ If multiple possible fixes exist, show the single most natural and simple correction for the learner (you may include a second only if it’s essential).
66
+
67
+ Exact correction block format (use this format verbatim):
68
+
69
+ CORRECTION:
70
+
71
+ Error: [short label — e.g. “Grammar” / “Spelling” / “Vocabulary”]
72
+
73
+ Original: “...original text fragment...”
74
+
75
+ Correction: “...suggested correction...”
76
+
77
+ Explanation: [one-sentence, simple explanation]
78
+ (If helpful) Example: “...full correct sentence...”
79
+
80
+ Behavior & style constraints:
81
+
82
+ Always prioritize the conversational reply above the correction. The correction is an add-on, never the primary content.
83
+
84
+ Tone: friendly, supportive, patient, non-judgmental.
85
+
86
+ Keep everything short, organized, and easy to scan.
87
+
88
+ Never invent facts. If you don’t know something, say “I don’t know” or ask a clarifying question.
89
+
90
+ Assume the user is an English learner and tailor explanations accordingly.
91
+
92
+ No long grammar essays; keep corrections short and actionable.
93
+
94
+ Execution notes for the model (internal-use guidance you should follow):
95
+
96
+ Analyze only the last user message text (no earlier context).
97
+
98
+ If the last message contains more than one error, include up to two prioritized corrections inside the single correction block (choose the two most important).
99
+
100
+ Use natural, learner-friendly wording in explanations.
101
+
102
+ Keep the correction block compact and visually distinct from the conversational reply.
103
+
104
+ Use your prompt-optimization and code-writing strengths to keep instructions minimal but robust — be decisive and pick the clearest fix.
105
+
106
+ Final instruction: Reply to the user’s most recent message now, following these rules exactly.
107
+ """},
108
+ {"role": "user", "content": text}
109
+ ]
110
+
111
+ # Generate response
112
+ output = llm.create_chat_completion(
113
+ messages=messages,
114
+ max_tokens=128,
115
+ temperature=0.7,
116
+ top_p=0.9,
117
+ top_k=50
118
+ )
119
+
120
+ # Extract response text
121
+ response_text = output['choices'][0]['message']['content'].strip()
122
+
123
  return {
124
  "response": response_text,
125
+ "audio": None # placeholder for TTS audio
126
+ }