import gradio as gr
from transformers import pipeline

pipe = pipeline(
    "text-generation",
    model="complexly/olmo3-190m-zh-continue",
)  # 替换你的模型


def predict(message):
    output = pipe(
        message,
        max_new_tokens=256,
        do_sample=True,  # 开启采样（默认是贪心解码）
        temperature=0.7,  # 温度，越高越随机
        top_k=50,  # 只从概率最高的 k 个 token 中采样
        top_p=0.9,  # 核采样，累积概率截断
        repetition_penalty=1.2,  # 重复惩罚
    )
    return output[0]["generated_text"]


gr.Interface(fn=predict, inputs="text", outputs="text").launch()