agent / data /models /llama3-1-70b-nvidia.py
abenkbp's picture
debug
edddb3b
raw
history blame
820 Bytes
import argparse
from openai import OpenAI
import os
import json
import base64
# Set up argument parsing
parser = argparse.ArgumentParser(description="Pass message content to OpenAI API")
parser.add_argument("message", type=str, help="The message content to send")
args = parser.parse_args()
# Initialize the OpenAI client
client = OpenAI(
base_url = "https://integrate.api.nvidia.com/v1",
api_key = os.getenv('NVIDIA')
)
# Create the completion
completion = client.chat.completions.create(
model="meta/llama-3.1-70b-instruct",
messages=json.loads(base64.b64decode(args.message).decode('utf-8')),
temperature=0.2,
top_p=0.7,
max_tokens=1024,
stream=True
)
# Print the response
for chunk in completion:
if chunk.choices[0].delta.content is not None:
print(chunk.choices[0].delta.content, end="")