# # SPDX-FileCopyrightText: Hadad # SPDX-License-Identifier: Apache-2.0 # import os from config import MODEL, INFO, HOST from openai import AsyncOpenAI import gradio as gr async def playground( message, history, num_ctx, max_tokens, temperature, repeat_penalty, top_k, top_p ): if not isinstance(message, str) or not message.strip(): yield [] return messages = [] for item in history: if isinstance(item, dict) and "role" in item and "content" in item: messages.append({ "role": item["role"], "content": item["content"] }) messages.append({"role": "user", "content": message}) response = "" stream = await AsyncOpenAI( base_url=os.getenv("OLLAMA_API_BASE_URL"), api_key=os.getenv("OLLAMA_API_KEY") ).chat.completions.create( model=MODEL, messages=messages, max_tokens=int(max_tokens), temperature=float(temperature), top_p=float(top_p), stream=True, extra_body={ "num_ctx": int(num_ctx), "repeat_penalty": float(repeat_penalty), "top_k": int(top_k) } ) async for chunk in stream: if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content: response += chunk.choices[0].delta.content yield response with gr.Blocks( fill_height=True, fill_width=False ) as app: with gr.Sidebar(): gr.HTML(INFO) gr.Markdown("---") gr.Markdown("## Model Parameters") num_ctx = gr.Slider( minimum=512, maximum=8192, value=512, step=128, label="Context Length", info="Maximum context window size (memory)" ) gr.Markdown("") max_tokens = gr.Slider( minimum=512, maximum=8192, value=512, step=128, label="Max Tokens", info="Maximum number of tokens to generate" ) gr.Markdown("") temperature = gr.Slider( minimum=0.1, maximum=1.0, value=0.1, step=0.1, label="Temperature", info="Controls randomness in generation" ) gr.Markdown("") repeat_penalty = gr.Slider( minimum=0.1, maximum=2.0, value=1.05, step=0.1, label="Repetition Penalty", info="Penalty for repeating tokens" ) gr.Markdown("") top_k = gr.Slider( minimum=0, maximum=100, value=50, step=1, label="Top K", info="Number of top tokens to consider" ) gr.Markdown("") top_p = gr.Slider( minimum=0.0, maximum=1.0, value=0.1, step=0.05, label="Top P", info="Cumulative probability threshold" ) gr.ChatInterface( fn=playground, additional_inputs=[ num_ctx, max_tokens, temperature, repeat_penalty, top_k, top_p ], type="messages", examples=[ ["Please introduce yourself."], ["What caused World War II?"], ["Give me a short introduction to large language model."], ["Explain about quantum computers."] ], cache_examples=False, show_api=False ) app.launch( server_name=HOST, pwa=True )