CrazyMonkey0 commited on
Commit
df63d34
·
1 Parent(s): 0bdf4f1

feat(nlp): switch NLP model to Qwen2.5-0.5B-Instruct

Browse files
Files changed (3) hide show
  1. README.md +6 -4
  2. app/main.py +1 -1
  3. app/routes/nlp.py +42 -25
README.md CHANGED
@@ -9,7 +9,7 @@ app_file: "app/main.py"
9
  app_port: 7860
10
  short_description: "English learning API"
11
  models:
12
- - Qwen/Qwen2.5-1.5B-Instruct-GGUF
13
  - openai/whisper-small.en
14
  - facebook/mms-tts-eng
15
  - allegro/BiDi-eng-pol
@@ -53,7 +53,7 @@ Each model retains its original license as listed below:
53
  Developed by [**AI at Meta**](https://ai.facebook.com/).
54
 
55
  ### 💬 Natural Language Processing (Chat & Grammar)
56
- - [**Qwen2.5-1.5B-Instruct-GGUF**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF)
57
  Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
58
  Developed by [**Qwen Team**](https://qwen.ai/)
59
 
@@ -93,7 +93,7 @@ The source code of this application is distributed separately under the license
93
  year={2023}
94
  }
95
 
96
- ### 3. Qwen2.5-1.5B-Instruct-GGUF — Qwen Team
97
  @misc{qwen2.5,
98
  title = {Qwen2.5: A Party of Foundation Models},
99
  url = {https://qwenlm.github.io/blog/qwen2.5/},
@@ -101,6 +101,7 @@ The source code of this application is distributed separately under the license
101
  month = {September},
102
  year = {2024}
103
  }
 
104
  @article{qwen2,
105
  title={Qwen2 Technical Report},
106
  author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan},
@@ -109,6 +110,7 @@ The source code of this application is distributed separately under the license
109
  }
110
 
111
 
 
112
  ### 4. Allegro/BiDi-eng-pol — Allegro ML Research
113
  Authors:
114
  - MLR @ Allegro: [Artur Kot](https://linkedin.com/in/arturkot), [Mikołaj Koszowski](https://linkedin.com/in/mkoszowski), [Wojciech Chojnowski](https://linkedin.com/in/wojciech-chojnowski-744702348), [Mieszko Rutkowski](https://linkedin.com/in/mieszko-rutkowski)
@@ -122,7 +124,7 @@ Special thanks to the teams and organizations that created and maintain the foll
122
 
123
  - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
124
  - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
125
- - **[Qwen Team](https://qwenlm.github.io/blog/qwen2.5/)** for [**Qwen2.5-1.5B-Instruct-GGUF**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
126
  - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
127
 
128
  This application uses these models for educational and research purposes only, in full compliance with their respective licenses.
 
9
  app_port: 7860
10
  short_description: "English learning API"
11
  models:
12
+ - Qwen/Qwen2.5-0.5B-Instruct
13
  - openai/whisper-small.en
14
  - facebook/mms-tts-eng
15
  - allegro/BiDi-eng-pol
 
53
  Developed by [**AI at Meta**](https://ai.facebook.com/).
54
 
55
  ### 💬 Natural Language Processing (Chat & Grammar)
56
+ - [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)
57
  Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
58
  Developed by [**Qwen Team**](https://qwen.ai/)
59
 
 
93
  year={2023}
94
  }
95
 
96
+ ### 3. Qwen/Qwen2.5-0.5B-Instruct — Qwen Team
97
  @misc{qwen2.5,
98
  title = {Qwen2.5: A Party of Foundation Models},
99
  url = {https://qwenlm.github.io/blog/qwen2.5/},
 
101
  month = {September},
102
  year = {2024}
103
  }
104
+
105
  @article{qwen2,
106
  title={Qwen2 Technical Report},
107
  author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan},
 
110
  }
111
 
112
 
113
+
114
  ### 4. Allegro/BiDi-eng-pol — Allegro ML Research
115
  Authors:
116
  - MLR @ Allegro: [Artur Kot](https://linkedin.com/in/arturkot), [Mikołaj Koszowski](https://linkedin.com/in/mkoszowski), [Wojciech Chojnowski](https://linkedin.com/in/wojciech-chojnowski-744702348), [Mieszko Rutkowski](https://linkedin.com/in/mieszko-rutkowski)
 
124
 
125
  - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
126
  - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
127
+ - **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
128
  - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
129
 
130
  This application uses these models for educational and research purposes only, in full compliance with their respective licenses.
app/main.py CHANGED
@@ -12,7 +12,7 @@ app = FastAPI(debug=False)
12
  async def startup_event():
13
  print("[INFO] Loading all models...")
14
  try:
15
- app.state.model_nlp = load_model_nlp()
16
  app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
17
  app.state.model_tts, app.state.tokenizer_tts = load_model_tts()
18
  app.state.processor_asr, app.state.model_asr = load_model_asr()
 
12
  async def startup_event():
13
  print("[INFO] Loading all models...")
14
  try:
15
+ app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
16
  app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
17
  app.state.model_tts, app.state.tokenizer_tts = load_model_tts()
18
  app.state.processor_asr, app.state.model_asr = load_model_asr()
app/routes/nlp.py CHANGED
@@ -1,6 +1,7 @@
1
  from fastapi import APIRouter, Request, Response
 
 
2
  from pydantic import BaseModel
3
- from llama_cpp import Llama
4
  from .tts import save_audio
5
  import uuid
6
  import os
@@ -19,36 +20,52 @@ class ChatRequest(BaseModel):
19
  # Load NLP model
20
  def load_model_nlp():
21
 
22
- llm = Llama.from_pretrained(
23
- repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
24
- filename="qwen2.5-1.5b-instruct-q3_k_m.gguf",
25
- use_mmap=True,
26
- verbose=True,
 
 
27
  )
28
- return llm
 
 
29
 
30
  @router.post("/chat")
31
  async def chat(request: Request, chat_request: ChatRequest):
32
- """Endpoint for chat with the NLP model."""
33
  text = chat_request.message
34
-
35
- # Download the loaded NLP model
36
- llm = request.app.state.model_nlp
37
-
38
- # Create the prompt
39
- prompt = f"{SYSTEM_PROMPT}\n\nUser: {text}\nEmma:"
40
-
41
- # Generate response from the model
42
- output = llm(
43
- prompt,
44
- max_tokens=128,
45
- temperature=0.7,
46
- top_p=0.9,
47
- top_k=50,
48
- stop=["\nUser:", "\nEmma:"]
49
- )
50
 
51
- response_text = output["choices"][0]["text"].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  return {"response": response_text}
53
 
54
  # # Generate audio using TTS
 
1
  from fastapi import APIRouter, Request, Response
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
  from pydantic import BaseModel
 
5
  from .tts import save_audio
6
  import uuid
7
  import os
 
20
  # Load NLP model
21
  def load_model_nlp():
22
 
23
+ model_id = "Qwen/Qwen2.5-0.5B-Instruct"
24
+
25
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
26
+ model = AutoModelForCausalLM.from_pretrained(
27
+ model_id,
28
+ torch_dtype=torch.float32, # CPU friendly
29
+ device_map="cpu"
30
  )
31
+
32
+ model.eval()
33
+ return model, tokenizer
34
 
35
  @router.post("/chat")
36
  async def chat(request: Request, chat_request: ChatRequest):
 
37
  text = chat_request.message
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
+ model = request.app.state.model_nlp
40
+ tokenizer = request.app.state.tokenizer_nlp
41
+
42
+ messages = [
43
+ {"role": "system", "content": SYSTEM_PROMPT},
44
+ {"role": "user", "content": text},
45
+ ]
46
+
47
+ inputs = tokenizer.apply_chat_template(
48
+ messages,
49
+ add_generation_prompt=True,
50
+ tokenize=True,
51
+ return_dict=True,
52
+ return_tensors="pt",
53
+ ).to(model.device)
54
+
55
+ with torch.no_grad():
56
+ output = model.generate(
57
+ **inputs,
58
+ max_new_tokens=150,
59
+ temperature=0.7,
60
+ top_p=0.9,
61
+ do_sample=True,
62
+ )
63
+
64
+ response_text = tokenizer.decode(
65
+ output[0][inputs["input_ids"].shape[-1]:],
66
+ skip_special_tokens=True
67
+ ).strip()
68
+
69
  return {"response": response_text}
70
 
71
  # # Generate audio using TTS