| # launch server | |
| # python -m sglang.launch_server --model LxzGordon/URM-LLaMa-3.1-8B --is-embedding | |
| import requests | |
| url = "http://127.0.0.1:30000" | |
| PROMPT = ( | |
| "What is the range of the numeric output of a sigmoid node in a neural network?" | |
| ) | |
| RESPONSE1 = "The output of a sigmoid node is bounded between -1 and 1." | |
| RESPONSE2 = "The output of a sigmoid node is bounded between 0 and 1." | |
| json_data = { | |
| "conv": [ | |
| [ | |
| {"role": "user", "content": PROMPT}, | |
| {"role": "assistant", "content": RESPONSE1}, | |
| ], | |
| [ | |
| {"role": "user", "content": PROMPT}, | |
| {"role": "assistant", "content": RESPONSE2}, | |
| ], | |
| ], | |
| } | |
| response = requests.post( | |
| url + "/classify", | |
| json=json_data, | |
| ).json() | |
| print(response) | |
| print("scores:", [x["embedding"] for x in response]) | |