Spaces:
Sleeping
Sleeping
| import os | |
| from openai import AsyncOpenAI | |
| import gradio as gr | |
| default_model = "llama3:8b-instruct-q4_K_M" | |
| models = ["llama3:8b-instruct-q4_K_M", "codestral:22b-v0.1-q4_K_M"] | |
| description = "Learn more at https://replicantzk.com." | |
| base_url = os.getenv("OPENAI_BASE_URL") or "https://platform.replicantzk.com" | |
| api_key = os.getenv("OPENAI_API_KEY") | |
| async def predict(message, history, model, temperature, stream, base_url, api_key): | |
| client = AsyncOpenAI(base_url=base_url, api_key=api_key) | |
| history_openai_format = [] | |
| for human, assistant in history: | |
| history_openai_format.append({"role": "user", "content": human}) | |
| history_openai_format.append({"role": "assistant", "content": assistant}) | |
| history_openai_format.append({"role": "user", "content": message}) | |
| try: | |
| response = await client.chat.completions.create( | |
| model=model, | |
| messages=history_openai_format, | |
| temperature=temperature, | |
| stream=stream, | |
| ) | |
| if stream: | |
| partial_message = "" | |
| async for chunk in response: | |
| if chunk.choices[0].delta.content is not None: | |
| partial_message += chunk.choices[0].delta.content | |
| yield partial_message | |
| else: | |
| yield response.choices[0].message.content | |
| except Exception as e: | |
| raise gr.Error(str(e)) | |
| model = gr.Dropdown(label="Model", choices=models, value=default_model) | |
| temperature = gr.Slider(0, 1, value=0, label="Temperature") | |
| stream = gr.Checkbox(value=True, label="Stream") | |
| base_url = gr.Textbox(label="OpenAI-compatible base URL", value=base_url) | |
| api_key = gr.Textbox(label="OpenAI-compatible API key", type="password", value=api_key) | |
| demo = gr.ChatInterface( | |
| fn=predict, | |
| additional_inputs=[model, temperature, stream, base_url, api_key], | |
| description=description, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |