| | |
| | import os |
| |
|
| | from openai import OpenAI |
| |
|
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
| |
|
| |
|
| | def infer(client, model: str, messages): |
| | query = messages[0]['content'] |
| | print(f'query: {query}') |
| | resp = client.completions.create(model=model, prompt=query, max_tokens=64, temperature=0) |
| | response = resp.choices[0].text |
| | print(f'response: {response}') |
| | |
| | resp = client.chat.completions.create(model=model, messages=messages, max_tokens=64, temperature=0) |
| | response = resp.choices[0].message.content |
| | print(f'response: {response}') |
| | return response |
| |
|
| |
|
| | def run_client(host: str = '127.0.0.1', port: int = 8000): |
| | client = OpenAI( |
| | api_key='EMPTY', |
| | base_url=f'http://{host}:{port}/v1', |
| | ) |
| | model = client.models.list().data[0].id |
| | print(f'model: {model}') |
| |
|
| | messages = [{'role': 'user', 'content': '浙江 -> 杭州\n安徽 -> 合肥\n四川 ->'}] |
| | infer(client, model, messages) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | from swift.llm import run_deploy, DeployArguments |
| | |
| | with run_deploy( |
| | DeployArguments( |
| | model='Qwen/Qwen2.5-1.5B', verbose=False, log_interval=-1, infer_backend='pt', |
| | use_chat_template=False)) as port: |
| | run_client(port=port) |
| |
|