|
|
from logging import getLogger |
|
|
from llama_cpp import Llama |
|
|
import time |
|
|
|
|
|
logger = getLogger(__name__) |
|
|
|
|
|
class QwenTranslator: |
|
|
def __init__(self, model_path, system_prompt="") -> None: |
|
|
self.llm = Llama( |
|
|
model_path=model_path, |
|
|
|
|
|
|
|
|
|
|
|
chat_format="chatml", |
|
|
verbose=False |
|
|
) |
|
|
self.sys_prompt = system_prompt |
|
|
|
|
|
def to_message(self, prompt, src_lang, dst_lang): |
|
|
"""构造提示词""" |
|
|
return [ |
|
|
{"role": "system", "content": self.sys_prompt.format(src_lang=src_lang, dst_lang=dst_lang)}, |
|
|
{"role": "user", "content": prompt}, |
|
|
] |
|
|
|
|
|
|
|
|
def translate(self, prompt, src_lang, dst_lang) -> str: |
|
|
message = self.to_message(prompt, src_lang, dst_lang) |
|
|
start_time = time.monotonic() |
|
|
output = self.llm.create_chat_completion(messages=message, temperature=0.9) |
|
|
logger.info(f"LLM translate cose: {time.monotonic() - start_time:.2f}s.") |
|
|
return output['choices'][0]['message']['content'] |
|
|
|
|
|
def __call__(self, prompt, max_tokens=512,*args, **kwargs): |
|
|
return self.llm( |
|
|
prompt, |
|
|
*args, |
|
|
max_tokens=max_tokens, |
|
|
**kwargs |
|
|
) |