agentic-api / examples /openai_sdk_example.py
MiniMax Agent
Add OpenAI API compatible endpoints for OpenELM models
c126015
"""
Example: Using OpenAI SDK with OpenELM API
This example demonstrates how to use the OpenAI SDK (or compatible client)
to call OpenELM models through our OpenAI API compatible wrapper.
Note: The official openai Python package requires the API server to have
proper authentication. For testing, use the included OpenAIClient helper.
Usage:
python examples/openai_sdk_example.py
"""
import sys
import os
# Add parent directory to path for imports
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from app import OpenAIClient
def main():
"""Example usage of the OpenAI-compatible OpenELM API."""
# Create client pointing to our local API
base_url = os.environ.get("OPENELM_API_URL", "http://localhost:8000")
client = OpenAIClient(base_url=base_url, api_key="dummy-key")
print("=" * 60)
print("OpenELM OpenAI API - Usage Example")
print("=" * 60)
print(f"API URL: {base_url}")
print()
# Example 1: Basic chat completion
print("Example 1: Basic Chat Completion")
print("-" * 40)
response = client.chat.completions.create(
model="openelm-450m-instruct",
messages=[
{"role": "user", "content": "Say hello in a friendly way!"}
],
max_tokens=100,
temperature=0.7
)
print(f"Response ID: {response['id']}")
print(f"Model: {response['model']}")
print(f"Content: {response['choices'][0]['message']['content']}")
print(f"Usage: {response['usage']}")
print()
# Example 2: Multi-turn conversation
print("Example 2: Multi-turn Conversation")
print("-" * 40)
response = client.chat.completions.create(
model="openelm-450m-instruct",
messages=[
{"role": "user", "content": "What is artificial intelligence?"},
{"role": "assistant", "content": "Artificial intelligence (AI) refers to systems that can perform tasks that typically require human intelligence."},
{"role": "user", "content": "What are some examples?"}
],
max_tokens=150,
temperature=0.5
)
print(f"Content: {response['choices'][0]['message']['content']}")
print(f"Usage: {response['usage']}")
print()
# Example 3: Using system message
print("Example 3: Using System Message")
print("-" * 40)
response = client.chat.completions.create(
model="openelm-450m-instruct",
messages=[
{"role": "system", "content": "You are a helpful coding assistant."},
{"role": "user", "content": "What is a Python decorator?"}
],
max_tokens=200,
temperature=0.8
)
print(f"Content: {response['choices'][0]['message']['content']}")
print(f"Usage: {response['usage']}")
print()
# Example 4: Deterministic generation (temperature=0)
print("Example 4: Deterministic Generation (temperature=0)")
print("-" * 40)
response = client.chat.completions.create(
model="openelm-450m-instruct",
messages=[
{"role": "user", "content": "What is 2 + 2?"}
],
max_tokens=50,
temperature=0.0 # Deterministic output
)
print(f"Content: {response['choices'][0]['message']['content']}")
print(f"Usage: {response['usage']}")
print()
# Example 5: Streaming response
print("Example 5: Streaming Response")
print("-" * 40)
print("Streaming response:")
response = client.chat.completions.create(
model="openelm-450m-instruct",
messages=[
{"role": "user", "content": "Count to 5, one number per line."}
],
max_tokens=100,
temperature=0.7,
stream=True
)
# For streaming, response is a generator
chunk_count = 0
for chunk in response:
if 'choices' in chunk and chunk['choices']:
delta = chunk['choices'][0].get('delta', {})
if 'content' in delta:
content = delta['content']
if content:
print(content, end="", flush=True)
chunk_count += 1
elif 'error' in chunk:
print(f"Error: {chunk['error']}")
break
print("\n")
print(f"Received {chunk_count} chunks")
print()
print("=" * 60)
print("All examples completed successfully!")
print("=" * 60)
if __name__ == "__main__":
main()