|
|
import os |
|
|
import gradio as gr |
|
|
from openai import OpenAI |
|
|
|
|
|
title = None |
|
|
description = None |
|
|
|
|
|
modelConfig = { |
|
|
"MODEL_NAME": os.environ.get("MODEL_NAME"), |
|
|
"MODE_DISPLAY_NAME": os.environ.get("MODE_DISPLAY_NAME"), |
|
|
"MODEL_HF_URL": os.environ.get("MODEL_HF_URL"), |
|
|
"VLLM_API_URL": os.environ.get("VLLM_API_URL"), |
|
|
"AUTH_TOKEN": os.environ.get("AUTH_TOKEN") |
|
|
} |
|
|
|
|
|
|
|
|
client = OpenAI( |
|
|
api_key=modelConfig.get('AUTH_TOKEN'), |
|
|
base_url=modelConfig.get('VLLM_API_URL') |
|
|
) |
|
|
|
|
|
|
|
|
def chat_fn(message, history): |
|
|
|
|
|
formatted = [{"role": "user", "content": user} if i % 2 == 0 else {"role": "assistant", "content": assistant} |
|
|
for i, (user, assistant) in enumerate(history)] |
|
|
formatted.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
stream = client.chat.completions.create( |
|
|
model=modelConfig.get('MODEL_NAME'), |
|
|
messages=formatted, |
|
|
temperature=0.8, |
|
|
stream=True |
|
|
) |
|
|
|
|
|
output = "" |
|
|
for chunk in stream: |
|
|
|
|
|
content = getattr(chunk.choices[0].delta, "content", "") |
|
|
output += content |
|
|
|
|
|
if output.endswith("<|end|>"): |
|
|
yield {"role": "assistant", "content": output[:-7]} |
|
|
else: |
|
|
yield {"role": "assistant", "content": output} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"Running model {modelConfig.get('MODE_DISPLAY_NAME')} ({modelConfig.get('MODEL_NAME')})") |
|
|
|
|
|
gr.ChatInterface( |
|
|
chat_fn, |
|
|
title=title, |
|
|
description=description, |
|
|
theme=gr.themes.Default(primary_hue="green"), |
|
|
type="messages" |
|
|
).launch() |
|
|
|