Spaces:

CrazyMonkey0
/

APi_English

Sleeping

App Files Files Community

CrazyMonkey0 commited on Dec 16, 2025

Commit

df63d34

1 Parent(s): 0bdf4f1

feat(nlp): switch NLP model to Qwen2.5-0.5B-Instruct

Browse files

Files changed (3) hide show

README.md +6 -4
app/main.py +1 -1
app/routes/nlp.py +42 -25

README.md CHANGED Viewed

@@ -9,7 +9,7 @@ app_file: "app/main.py"
 app_port: 7860
 short_description: "English learning API"
 models:
-  - Qwen/Qwen2.5-1.5B-Instruct-GGUF
   - openai/whisper-small.en
   - facebook/mms-tts-eng
   - allegro/BiDi-eng-pol
@@ -53,7 +53,7 @@ Each model retains its original license as listed below:
   Developed by [**AI at Meta**](https://ai.facebook.com/).
 ### 💬 Natural Language Processing (Chat & Grammar)
-- [**Qwen2.5-1.5B-Instruct-GGUF**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF)
   Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
   Developed by [**Qwen Team**](https://qwen.ai/)
@@ -93,7 +93,7 @@ The source code of this application is distributed separately under the license
     year={2023}
 }
-### 3. Qwen2.5-1.5B-Instruct-GGUF — Qwen Team
 @misc{qwen2.5,
     title = {Qwen2.5: A Party of Foundation Models},
     url = {https://qwenlm.github.io/blog/qwen2.5/},
@@ -101,6 +101,7 @@ The source code of this application is distributed separately under the license
     month = {September},
     year = {2024}
 }
 @article{qwen2,
       title={Qwen2 Technical Report},
       author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan},
@@ -109,6 +110,7 @@ The source code of this application is distributed separately under the license
 }
 ### 4. Allegro/BiDi-eng-pol — Allegro ML Research
 Authors:
 - MLR @ Allegro: [Artur Kot](https://linkedin.com/in/arturkot), [Mikołaj Koszowski](https://linkedin.com/in/mkoszowski), [Wojciech Chojnowski](https://linkedin.com/in/wojciech-chojnowski-744702348), [Mieszko Rutkowski](https://linkedin.com/in/mieszko-rutkowski)
@@ -122,7 +124,7 @@ Special thanks to the teams and organizations that created and maintain the foll
 - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
-- **[Qwen Team](https://qwenlm.github.io/blog/qwen2.5/)** for [**Qwen2.5-1.5B-Instruct-GGUF**](https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct-GGUF) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
 This application uses these models for educational and research purposes only, in full compliance with their respective licenses.

 app_port: 7860
 short_description: "English learning API"
 models:
+  - Qwen/Qwen2.5-0.5B-Instruct
   - openai/whisper-small.en
   - facebook/mms-tts-eng
   - allegro/BiDi-eng-pol
   Developed by [**AI at Meta**](https://ai.facebook.com/).
 ### 💬 Natural Language Processing (Chat & Grammar)
+- [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct)
   Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
   Developed by [**Qwen Team**](https://qwen.ai/)
     year={2023}
 }
+### 3. Qwen/Qwen2.5-0.5B-Instruct — Qwen Team
 @misc{qwen2.5,
     title = {Qwen2.5: A Party of Foundation Models},
     url = {https://qwenlm.github.io/blog/qwen2.5/},
     month = {September},
     year = {2024}
 }
 @article{qwen2,
       title={Qwen2 Technical Report},
       author={An Yang and Baosong Yang and Binyuan Hui and Bo Zheng and Bowen Yu and Chang Zhou and Chengpeng Li and Chengyuan Li and Dayiheng Liu and Fei Huang and Guanting Dong and Haoran Wei and Huan Lin and Jialong Tang and Jialin Wang and Jian Yang and Jianhong Tu and Jianwei Zhang and Jianxin Ma and Jin Xu and Jingren Zhou and Jinze Bai and Jinzheng He and Junyang Lin and Kai Dang and Keming Lu and Keqin Chen and Kexin Yang and Mei Li and Mingfeng Xue and Na Ni and Pei Zhang and Peng Wang and Ru Peng and Rui Men and Ruize Gao and Runji Lin and Shijie Wang and Shuai Bai and Sinan Tan and Tianhang Zhu and Tianhao Li and Tianyu Liu and Wenbin Ge and Xiaodong Deng and Xiaohuan Zhou and Xingzhang Ren and Xinyu Zhang and Xipin Wei and Xuancheng Ren and Yang Fan and Yang Yao and Yichang Zhang and Yu Wan and Yunfei Chu and Yuqiong Liu and Zeyu Cui and Zhenru Zhang and Zhihao Fan},
 }
 ### 4. Allegro/BiDi-eng-pol — Allegro ML Research
 Authors:
 - MLR @ Allegro: [Artur Kot](https://linkedin.com/in/arturkot), [Mikołaj Koszowski](https://linkedin.com/in/mkoszowski), [Wojciech Chojnowski](https://linkedin.com/in/wojciech-chojnowski-744702348), [Mieszko Rutkowski](https://linkedin.com/in/mieszko-rutkowski)
 - **[OpenAI](https://openai.com/)** for [**Whisper Small (English)**](https://huggingface.co/openai/whisper-small.en) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Facebook AI Research (FAIR)](https://ai.facebook.com/)** for [**facebook/mms-tts-eng**](https://huggingface.co/facebook/mms-tts-eng) — Licensed under [Creative Commons Attribution Non Commercial 4.0 (CC BY-NC 4.0)](https://creativecommons.org/licenses/by-nc/4.0/).
+- **[Qwen Team](https://qwen.ai/)** for [**Qwen/Qwen2.5-0.5B-Instruct**](https://huggingface.co/Qwen/Qwen2.5-0.5B-Instruct) — Licensed under [Apache License 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 - **[Allegro ML Research](https://ml.allegro.tech/)** for [**BiDi-eng-pol**](https://huggingface.co/allegro/BiDi-eng-pol) — Licensed under [Creative Commons Attribution 4.0 International (CC BY 4.0)](https://creativecommons.org/licenses/by/4.0/).
 This application uses these models for educational and research purposes only, in full compliance with their respective licenses.

app/main.py CHANGED Viewed

@@ -12,7 +12,7 @@ app = FastAPI(debug=False)
 async def startup_event():
     print("[INFO] Loading all models...")
     try:
-        app.state.model_nlp = load_model_nlp()
         app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
         app.state.model_tts, app.state.tokenizer_tts = load_model_tts()
         app.state.processor_asr, app.state.model_asr = load_model_asr()

 async def startup_event():
     print("[INFO] Loading all models...")
     try:
+        app.state.model_nlp, app.state.tokenizer_nlp = load_model_nlp()
         app.state.model_trans, app.state.tokenizer_trans = load_model_translation()
         app.state.model_tts, app.state.tokenizer_tts = load_model_tts()
         app.state.processor_asr, app.state.model_asr = load_model_asr()

app/routes/nlp.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from fastapi import APIRouter, Request, Response
 from pydantic import BaseModel
-from llama_cpp import Llama
 from .tts import save_audio
 import uuid
 import os
@@ -19,36 +20,52 @@ class ChatRequest(BaseModel):
 # Load NLP model
 def load_model_nlp():
-    llm = Llama.from_pretrained(
-        repo_id="Qwen/Qwen2.5-1.5B-Instruct-GGUF",
-        filename="qwen2.5-1.5b-instruct-q3_k_m.gguf",
-        use_mmap=True,
-        verbose=True,
     )
-    return llm
 @router.post("/chat")
 async def chat(request: Request, chat_request: ChatRequest):
-    """Endpoint for chat with the NLP model."""
     text = chat_request.message
-    # Download the loaded NLP model
-    llm = request.app.state.model_nlp
-    # Create the prompt
-    prompt = f"{SYSTEM_PROMPT}\n\nUser: {text}\nEmma:"
-    # Generate response from the model
-    output = llm(
-        prompt,
-        max_tokens=128,
-        temperature=0.7,
-        top_p=0.9,
-        top_k=50,
-        stop=["\nUser:", "\nEmma:"]
-    )
-    response_text = output["choices"][0]["text"].strip()
     return {"response": response_text}
     # # Generate audio using TTS

 from fastapi import APIRouter, Request, Response
+from transformers import AutoModelForCausalLM, AutoTokenizer
+import torch
 from pydantic import BaseModel
 from .tts import save_audio
 import uuid
 import os
 # Load NLP model
 def load_model_nlp():
+    model_id = "Qwen/Qwen2.5-0.5B-Instruct"
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForCausalLM.from_pretrained(
+        model_id,
+        torch_dtype=torch.float32,   # CPU friendly
+        device_map="cpu"
     )
+    model.eval()
+    return model, tokenizer
 @router.post("/chat")
 async def chat(request: Request, chat_request: ChatRequest):
     text = chat_request.message
+    model = request.app.state.model_nlp
+    tokenizer = request.app.state.tokenizer_nlp
+    messages = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": text},
+    ]
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        add_generation_prompt=True,
+        tokenize=True,
+        return_dict=True,
+        return_tensors="pt",
+    ).to(model.device)
+    with torch.no_grad():
+        output = model.generate(
+            **inputs,
+            max_new_tokens=150,
+            temperature=0.7,
+            top_p=0.9,
+            do_sample=True,
+        )
+    response_text = tokenizer.decode(
+        output[0][inputs["input_ids"].shape[-1]:],
+        skip_special_tokens=True
+    ).strip()
     return {"response": response_text}
     # # Generate audio using TTS