hadadrjt's picture
LFM2.5-1.2B: 2026-01-14.
e2e7b98
#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#
import os
from config import MODEL, INFO, HOST
from openai import AsyncOpenAI
import gradio as gr
async def playground(
message,
history,
num_ctx,
max_tokens,
temperature,
repeat_penalty,
top_k,
top_p
):
if not isinstance(message, str) or not message.strip():
yield []
return
messages = []
for item in history:
if isinstance(item, dict) and "role" in item and "content" in item:
messages.append({
"role": item["role"],
"content": item["content"]
})
messages.append({"role": "user", "content": message})
response = ""
stream = await AsyncOpenAI(
base_url=os.getenv("OLLAMA_API_BASE_URL"),
api_key=os.getenv("OLLAMA_API_KEY")
).chat.completions.create(
model=MODEL,
messages=messages,
max_tokens=int(max_tokens),
temperature=float(temperature),
top_p=float(top_p),
stream=True,
extra_body={
"num_ctx": int(num_ctx),
"repeat_penalty": float(repeat_penalty),
"top_k": int(top_k)
}
)
async for chunk in stream:
if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
response += chunk.choices[0].delta.content
yield response
with gr.Blocks(
fill_height=True,
fill_width=False
) as app:
with gr.Sidebar():
gr.HTML(INFO)
gr.Markdown("---")
gr.Markdown("## Model Parameters")
num_ctx = gr.Slider(
minimum=512,
maximum=8192,
value=512,
step=128,
label="Context Length",
info="Maximum context window size (memory)"
)
gr.Markdown("")
max_tokens = gr.Slider(
minimum=512,
maximum=8192,
value=512,
step=128,
label="Max Tokens",
info="Maximum number of tokens to generate"
)
gr.Markdown("")
temperature = gr.Slider(
minimum=0.1,
maximum=1.0,
value=0.1,
step=0.1,
label="Temperature",
info="Controls randomness in generation"
)
gr.Markdown("")
repeat_penalty = gr.Slider(
minimum=0.1,
maximum=2.0,
value=1.05,
step=0.1,
label="Repetition Penalty",
info="Penalty for repeating tokens"
)
gr.Markdown("")
top_k = gr.Slider(
minimum=0,
maximum=100,
value=50,
step=1,
label="Top K",
info="Number of top tokens to consider"
)
gr.Markdown("")
top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.1,
step=0.05,
label="Top P",
info="Cumulative probability threshold"
)
gr.ChatInterface(
fn=playground,
additional_inputs=[
num_ctx,
max_tokens,
temperature,
repeat_penalty,
top_k,
top_p
],
type="messages",
examples=[
["Please introduce yourself."],
["What caused World War II?"],
["Give me a short introduction to large language model."],
["Explain about quantum computers."]
],
cache_examples=False,
show_api=False
)
app.launch(
server_name=HOST,
pwa=True
)