Spaces:
Sleeping
Sleeping
CrazyMonkey0
commited on
Commit
·
df63d34
1
Parent(s):
0bdf4f1
feat(nlp): switch NLP model to Qwen2.5-0.5B-Instruct
Browse files- README.md +6 -4
- app/main.py +1 -1
- app/routes/nlp.py +42 -25
README.md
CHANGED
|
@@ -9,7 +9,7 @@ app_file: "app/main.py"
|
|
| 9 |
app_port: 7860
|
| 10 |
short_description: "English learning API"
|
| 11 |
models:
|
| 12 |
-
- Qwen/Qwen2.5-
|
| 13 |
- openai/whisper-small.en
|
| 14 |
- facebook/mms-tts-eng
|
| 15 |
- allegro/BiDi-eng-pol
|
|
@@ -53,7 +53,7 @@ Each model retains its original license as listed below:
|
|
| 53 |
Developed by [**AI at Meta**](https://ai.facebook.com/).
|
| 54 |
|
| 55 |
### 💬 Natural Language Processing (Chat & Grammar)
|
| 56 |
-
- [**Qwen2.5-
|
| 57 |
Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
|
| 58 |
Developed by [**Qwen Team**](https://qwen.ai/)
|
| 59 |
|
|
@@ -93,7 +93,7 @@ The source code of this application is distributed separately under the license
|
|
| 93 |
year={2023}
|
| 94 |
}
|
| 95 |
|
| 96 |
-
### 3. Qwen2.5-
|
| 97 |
@misc{qwen2.5,
|
| 98 |
title = {Qwen2.5: A Party of Foundation Models},
|
| 99 |
url = {https://qwenlm.github.io/blog/qwen2.5/},
|
|
@@ -101,6 +101,7 @@ The source code of this application is distributed separately under the license
|
|
| 101 |
month = {September},
|
| 102 |
year = {2024}
|
| 103 |
}
|
|
|
|
| 104 |
@article{qwen2,
|
| 105 |
title={Qwen2 Technical Report},
|
| 106 |
author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan},
|
|
@@ -109,6 +110,7 @@ The source code of this application is distributed separately under the license
|
|
| 109 |
}
|
| 110 |
|
| 111 |
|
|
|
|
| 112 |
### 4. Allegro/BiDi-eng-pol — Allegro ML Research
|
| 113 |
Authors:
|
| 114 |
- MLR @ Allegro: [Artur Kot](https://linkedin.com/in/arturkot), [Mikołaj Koszowski](https://linkedin.com/in/mkoszowski), [Wojciech Chojnowski](https://linkedin.com/in/wojciech-chojnowski-744702348), [Mieszko Rutkowski](https://linkedin.com/in/mieszko-rutkowski)
|
|
@@ -122,7 +124,7 @@ Special thanks to the teams and organizations that created and maintain the foll
|
|
| 122 |
|
| 123 |
- **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
|
| 124 |
- **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
|
| 125 |
-
- **[Qwen Team](https://
|
| 126 |
- **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
|
| 127 |
|
| 128 |
This application uses these models for educational and research purposes only, in full compliance with their respective licenses.
|
|
|
|
| 9 |
app_port: 7860
|
| 10 |
short_description: "English learning API"
|
| 11 |
models:
|
| 12 |
+
- Qwen/Qwen2.5-0.5B-Instruct
|
| 13 |
- openai/whisper-small.en
|
| 14 |
- facebook/mms-tts-eng
|
| 15 |
- allegro/BiDi-eng-pol
|
|
|
|
| 53 |
Developed by [**AI at Meta**](https://ai.facebook.com/).
|
| 54 |
|
| 55 |
### 💬 Natural Language Processing (Chat & Grammar)
|
| 56 |
+
- [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)
|
| 57 |
Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
|
| 58 |
Developed by [**Qwen Team**](https://qwen.ai/)
|
| 59 |
|
|
|
|
| 93 |
year={2023}
|
| 94 |
}
|
| 95 |
|
| 96 |
+
### 3. Qwen/Qwen2.5-0.5B-Instruct — Qwen Team
|
| 97 |
@misc{qwen2.5,
|
| 98 |
title = {Qwen2.5: A Party of Foundation Models},
|
| 99 |
url = {https://qwenlm.github.io/blog/qwen2.5/},
|
|
|
|
| 101 |
month = {September},
|
| 102 |
year = {2024}
|
| 103 |
}
|
| 104 |
+
|
| 105 |
@article{qwen2,
|
| 106 |
title={Qwen2 Technical Report},
|
| 107 |
author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan},
|
|
|
|
| 110 |
}
|
| 111 |
|
| 112 |
|
| 113 |
+
|
| 114 |
### 4. Allegro/BiDi-eng-pol — Allegro ML Research
|
| 115 |
Authors:
|
| 116 |
- MLR @ Allegro: [Artur Kot](https://linkedin.com/in/arturkot), [Mikołaj Koszowski](https://linkedin.com/in/mkoszowski), [Wojciech Chojnowski](https://linkedin.com/in/wojciech-chojnowski-744702348), [Mieszko Rutkowski](https://linkedin.com/in/mieszko-rutkowski)
|
|
|
|
| 124 |
|
| 125 |
- **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
|
| 126 |
- **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
|
| 127 |
+
- **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
|
| 128 |
- **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
|
| 129 |
|
| 130 |
This application uses these models for educational and research purposes only, in full compliance with their respective licenses.
|
app/main.py
CHANGED
|
@@ -12,7 +12,7 @@ app = FastAPI(debug=False)
|
|
| 12 |
async def startup_event():
|
| 13 |
print("[INFO] Loading all models...")
|
| 14 |
try:
|
| 15 |
-
app.state.model_nlp = load_model_nlp()
|
| 16 |
app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
|
| 17 |
app.state.model_tts, app.state.tokenizer_tts = load_model_tts()
|
| 18 |
app.state.processor_asr, app.state.model_asr = load_model_asr()
|
|
|
|
| 12 |
async def startup_event():
|
| 13 |
print("[INFO] Loading all models...")
|
| 14 |
try:
|
| 15 |
+
app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
|
| 16 |
app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
|
| 17 |
app.state.model_tts, app.state.tokenizer_tts = load_model_tts()
|
| 18 |
app.state.processor_asr, app.state.model_asr = load_model_asr()
|
app/routes/nlp.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
from fastapi import APIRouter, Request, Response
|
|
|
|
|
|
|
| 2 |
from pydantic import BaseModel
|
| 3 |
-
from llama_cpp import Llama
|
| 4 |
from .tts import save_audio
|
| 5 |
import uuid
|
| 6 |
import os
|
|
@@ -19,36 +20,52 @@ class ChatRequest(BaseModel):
|
|
| 19 |
# Load NLP model
|
| 20 |
def load_model_nlp():
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
|
|
|
|
|
|
| 27 |
)
|
| 28 |
-
|
|
|
|
|
|
|
| 29 |
|
| 30 |
@router.post("/chat")
|
| 31 |
async def chat(request: Request, chat_request: ChatRequest):
|
| 32 |
-
"""Endpoint for chat with the NLP model."""
|
| 33 |
text = chat_request.message
|
| 34 |
-
|
| 35 |
-
# Download the loaded NLP model
|
| 36 |
-
llm = request.app.state.model_nlp
|
| 37 |
-
|
| 38 |
-
# Create the prompt
|
| 39 |
-
prompt = f"{SYSTEM_PROMPT}\n\nUser: {text}\nEmma:"
|
| 40 |
-
|
| 41 |
-
# Generate response from the model
|
| 42 |
-
output = llm(
|
| 43 |
-
prompt,
|
| 44 |
-
max_tokens=128,
|
| 45 |
-
temperature=0.7,
|
| 46 |
-
top_p=0.9,
|
| 47 |
-
top_k=50,
|
| 48 |
-
stop=["\nUser:", "\nEmma:"]
|
| 49 |
-
)
|
| 50 |
|
| 51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
return {"response": response_text}
|
| 53 |
|
| 54 |
# # Generate audio using TTS
|
|
|
|
| 1 |
from fastapi import APIRouter, Request, Response
|
| 2 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 3 |
+
import torch
|
| 4 |
from pydantic import BaseModel
|
|
|
|
| 5 |
from .tts import save_audio
|
| 6 |
import uuid
|
| 7 |
import os
|
|
|
|
| 20 |
# Load NLP model
|
| 21 |
def load_model_nlp():
|
| 22 |
|
| 23 |
+
model_id = "Qwen/Qwen2.5-0.5B-Instruct"
|
| 24 |
+
|
| 25 |
+
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 26 |
+
model = AutoModelForCausalLM.from_pretrained(
|
| 27 |
+
model_id,
|
| 28 |
+
torch_dtype=torch.float32, # CPU friendly
|
| 29 |
+
device_map="cpu"
|
| 30 |
)
|
| 31 |
+
|
| 32 |
+
model.eval()
|
| 33 |
+
return model, tokenizer
|
| 34 |
|
| 35 |
@router.post("/chat")
|
| 36 |
async def chat(request: Request, chat_request: ChatRequest):
|
|
|
|
| 37 |
text = chat_request.message
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
|
| 39 |
+
model = request.app.state.model_nlp
|
| 40 |
+
tokenizer = request.app.state.tokenizer_nlp
|
| 41 |
+
|
| 42 |
+
messages = [
|
| 43 |
+
{"role": "system", "content": SYSTEM_PROMPT},
|
| 44 |
+
{"role": "user", "content": text},
|
| 45 |
+
]
|
| 46 |
+
|
| 47 |
+
inputs = tokenizer.apply_chat_template(
|
| 48 |
+
messages,
|
| 49 |
+
add_generation_prompt=True,
|
| 50 |
+
tokenize=True,
|
| 51 |
+
return_dict=True,
|
| 52 |
+
return_tensors="pt",
|
| 53 |
+
).to(model.device)
|
| 54 |
+
|
| 55 |
+
with torch.no_grad():
|
| 56 |
+
output = model.generate(
|
| 57 |
+
**inputs,
|
| 58 |
+
max_new_tokens=150,
|
| 59 |
+
temperature=0.7,
|
| 60 |
+
top_p=0.9,
|
| 61 |
+
do_sample=True,
|
| 62 |
+
)
|
| 63 |
+
|
| 64 |
+
response_text = tokenizer.decode(
|
| 65 |
+
output[0][inputs["input_ids"].shape[-1]:],
|
| 66 |
+
skip_special_tokens=True
|
| 67 |
+
).strip()
|
| 68 |
+
|
| 69 |
return {"response": response_text}
|
| 70 |
|
| 71 |
# # Generate audio using TTS
|