File size: 1,443 Bytes
ade4f6a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
from logging import getLogger
from pathlib import Path
from llama_cpp import Llama
from functools import lru_cache
logger = getLogger(__name__)
class QwenTranslator:
def __init__(self, model_path, system_prompt_en="", system_prompt_zh="") -> None:
self.llm = Llama(
model_path=model_path,
chat_format="chatml",
verbose=False)
self.sys_prompt_en = system_prompt_en
self.sys_prompt_zh = system_prompt_zh
def to_message(self, prompt, src_lang, dst_lang):
"""构造提示词"""
return [
{"role": "system", "content": self.sys_prompt_en if src_lang == "en" else self.sys_prompt_zh},
{"role": "user", "content": prompt},
]
@lru_cache(maxsize=10)
def translate(self, prompt, src_lang, dst_lang) -> str:
message = self.to_message(prompt, src_lang, dst_lang)
output = self.llm.create_chat_completion(messages=message, temperature=0)
return output['choices'][0]['message']['content']
def __call__(self, prompt,*args, **kwargs):
return self.llm(
prompt,
*args,
**kwargs
)
if __name__ == '__main__':
model_dir = Path("/Users/jeqin/work/code/Translator/moyoyo_asr_models")
qwen2 = (model_dir / "qwen2.5-1.5b-instruct-q5_0.gguf").as_posix()
qwen3 = (model_dir / "Qwen_Qwen3-0.6B-Q4_K_M.gguf").as_posix()
# translator = QwenTranslator(qwen2, ) |