File size: 2,245 Bytes
61ba51e | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | """
OpenAI-compatible LoRA adapter usage with SGLang.
Server Setup:
python -m sglang.launch_server \\
--model meta-llama/Llama-3.1-8B-Instruct \\
--enable-lora \\
--lora-paths sql=/path/to/sql python=/path/to/python
"""
import openai
client = openai.Client(base_url="http://127.0.0.1:30000/v1", api_key="EMPTY")
def main():
print("SGLang OpenAI-Compatible LoRA Examples\n")
# Example 1: NEW - Adapter in model parameter (OpenAI-compatible)
print("1. Chat with LoRA adapter in model parameter:")
response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct:sql", # ← adapter:name syntax
messages=[{"role": "user", "content": "Convert to SQL: show all users"}],
max_tokens=50,
)
print(f" Response: {response.choices[0].message.content}\n")
# Example 2: Completions API with adapter
print("2. Completion with LoRA adapter:")
response = client.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct:python",
prompt="def fibonacci(n):",
max_tokens=50,
)
print(f" Response: {response.choices[0].text}\n")
# Example 3: OLD - Backward compatible with explicit lora_path
print("3. Backward compatible (explicit lora_path):")
response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
messages=[{"role": "user", "content": "Convert to SQL: show all users"}],
extra_body={"lora_path": "sql"},
max_tokens=50,
)
print(f" Response: {response.choices[0].message.content}\n")
# Example 4: Base model (no adapter)
print("4. Base model without adapter:")
response = client.chat.completions.create(
model="meta-llama/Llama-3.1-8B-Instruct",
messages=[{"role": "user", "content": "Hello!"}],
max_tokens=30,
)
print(f" Response: {response.choices[0].message.content}\n")
print("All examples completed!")
if __name__ == "__main__":
try:
main()
except Exception as e:
print(f"Error: {e}")
print(
"\nEnsure server is running:\n"
" python -m sglang.launch_server --model ... --enable-lora --lora-paths ..."
)
|