File size: 1,823 Bytes
a80f6e6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import os
from openai import OpenAI
def get_streaming_response(message="Who are you",enable_thinking=True):
client = OpenAI(
api_key="sk-de60fca86cd34af3a4ff9b0e893139f5", # 替换为你的API Key
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
)
completion = client.chat.completions.create(
model="qwen3-8b",
messages=[
{'role': 'system', 'content': 'You are a helpful assistant.'},
{'role': 'user', 'content': message}
],
extra_body={
"enable_thinking": enable_thinking,
},
temperature=0,
top_p=0.9,
stream=True,
stream_options={"include_usage": True}
)
reasoning_content = ""
answer_content = ""
is_answering = False
for chunk in completion:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
reasoning_content += delta.reasoning_content
if hasattr(delta, "content") and delta.content:
is_answering = True
answer_content += delta.content
return f"<think>{reasoning_content}</think>{answer_content}"
def get_response_template(message,model="meta-llama/Meta-Llama-3-8B-Instruct",client=OpenAI(
api_key="EMPTY",
base_url="http://127.0.0.1:8422/v1",
)):
prompt= message
chat_response = client.chat.completions.create(
model=model,
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
],
temperature = 0.8,
)
print("Chat response:", chat_response.choices[0].message.content)
return chat_response.choices[0].message.content |