gpt-oss-120b-chat

Running

App Files Files Community

gpt-oss-120b-chat / app.py

bradnow

Update chat to use secrets

31bd9a2 verified 8 months ago

raw

history blame

1.92 kB

	import os
	import gradio as gr
	from openai import OpenAI

	title = None # "ServiceNow-AI Chat"
	description = None

	modelConfig = {
	"MODEL_NAME": os.environ.get("MODEL_NAME"),
	"MODE_DISPLAY_NAME": os.environ.get("MODE_DISPLAY_NAME"),
	"MODEL_HF_URL": os.environ.get("MODEL_HF_URL"),
	"VLLM_API_URL": os.environ.get("VLLM_API_URL"),
	"AUTH_TOKEN": os.environ.get("AUTH_TOKEN")
	}

	# Initialize the OpenAI client with the vLLM API URL and token
	client = OpenAI(
	api_key=modelConfig.get('AUTH_TOKEN'),
	base_url=modelConfig.get('VLLM_API_URL')
	)


	def chat_fn(message, history):
	# Format history as OpenAI expects
	formatted = [{"role": "user", "content": user} if i % 2 == 0 else {"role": "assistant", "content": assistant}
	for i, (user, assistant) in enumerate(history)]
	formatted.append({"role": "user", "content": message})

	# Create the streaming response
	stream = client.chat.completions.create(
	model=modelConfig.get('MODEL_NAME'),
	messages=formatted,
	temperature=0.8,
	stream=True
	)

	output = ""
	for chunk in stream:
	# Extract the new content from the delta field
	content = getattr(chunk.choices[0].delta, "content", "")
	output += content
	# Yield the current accumulated output, removing "<\|end\|>" if present
	if output.endswith("<\|end\|>"):
	yield {"role": "assistant", "content": output[:-7]}
	else:
	yield {"role": "assistant", "content": output}


	# Add the model display name and Hugging Face URL to the description
	# description = f"### Model: [{MODE_DISPLAY_NAME}]({MODEL_HF_URL})"

	print(f"Running model {modelConfig.get('MODE_DISPLAY_NAME')} ({modelConfig.get('MODEL_NAME')})")

	gr.ChatInterface(
	chat_fn,
	title=title,
	description=description,
	theme=gr.themes.Default(primary_hue="green"),
	type="messages"
	).launch()