LiquidAI-LFM2.5-1.2B-Instruct

Sleeping

App Files Files Community

LiquidAI-LFM2.5-1.2B-Instruct / src /app.py

hadadrjt

LFM2.5-1.2B: 2026-01-14.

e2e7b98 about 2 months ago

raw

history blame contribute delete

3.66 kB

	#
	# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
	# SPDX-License-Identifier: Apache-2.0
	#

	import os
	from config import MODEL, INFO, HOST
	from openai import AsyncOpenAI
	import gradio as gr

	async def playground(
	message,
	history,
	num_ctx,
	max_tokens,
	temperature,
	repeat_penalty,
	top_k,
	top_p
	):
	if not isinstance(message, str) or not message.strip():
	yield []
	return

	messages = []
	for item in history:
	if isinstance(item, dict) and "role" in item and "content" in item:
	messages.append({
	"role": item["role"],
	"content": item["content"]
	})
	messages.append({"role": "user", "content": message})

	response = ""
	stream = await AsyncOpenAI(
	base_url=os.getenv("OLLAMA_API_BASE_URL"),
	api_key=os.getenv("OLLAMA_API_KEY")
	).chat.completions.create(
	model=MODEL,
	messages=messages,
	max_tokens=int(max_tokens),
	temperature=float(temperature),
	top_p=float(top_p),
	stream=True,
	extra_body={
	"num_ctx": int(num_ctx),
	"repeat_penalty": float(repeat_penalty),
	"top_k": int(top_k)
	}
	)

	async for chunk in stream:
	if chunk.choices and chunk.choices[0].delta and chunk.choices[0].delta.content:
	response += chunk.choices[0].delta.content
	yield response

	with gr.Blocks(
	fill_height=True,
	fill_width=False
	) as app:
	with gr.Sidebar():
	gr.HTML(INFO)
	gr.Markdown("---")
	gr.Markdown("## Model Parameters")
	num_ctx = gr.Slider(
	minimum=512,
	maximum=8192,
	value=512,
	step=128,
	label="Context Length",
	info="Maximum context window size (memory)"
	)
	gr.Markdown("")
	max_tokens = gr.Slider(
	minimum=512,
	maximum=8192,
	value=512,
	step=128,
	label="Max Tokens",
	info="Maximum number of tokens to generate"
	)
	gr.Markdown("")
	temperature = gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.1,
	step=0.1,
	label="Temperature",
	info="Controls randomness in generation"
	)
	gr.Markdown("")
	repeat_penalty = gr.Slider(
	minimum=0.1,
	maximum=2.0,
	value=1.05,
	step=0.1,
	label="Repetition Penalty",
	info="Penalty for repeating tokens"
	)
	gr.Markdown("")
	top_k = gr.Slider(
	minimum=0,
	maximum=100,
	value=50,
	step=1,
	label="Top K",
	info="Number of top tokens to consider"
	)
	gr.Markdown("")
	top_p = gr.Slider(
	minimum=0.0,
	maximum=1.0,
	value=0.1,
	step=0.05,
	label="Top P",
	info="Cumulative probability threshold"
	)

	gr.ChatInterface(
	fn=playground,
	additional_inputs=[
	num_ctx,
	max_tokens,
	temperature,
	repeat_penalty,
	top_k,
	top_p
	],
	type="messages",
	examples=[
	["Please introduce yourself."],
	["What caused World War II?"],
	["Give me a short introduction to large language model."],
	["Explain about quantum computers."]
	],
	cache_examples=False,
	show_api=False
	)

	app.launch(
	server_name=HOST,
	pwa=True
	)