import gradio as gr from transformers import pipeline pipe = pipeline( "text-generation", model="complexly/olmo3-190m-zh-continue", ) # 替换你的模型 def predict(message): output = pipe( message, max_new_tokens=256, do_sample=True, # 开启采样(默认是贪心解码) temperature=0.7, # 温度,越高越随机 top_k=50, # 只从概率最高的 k 个 token 中采样 top_p=0.9, # 核采样,累积概率截断 repetition_penalty=1.2, # 重复惩罚 ) return output[0]["generated_text"] gr.Interface(fn=predict, inputs="text", outputs="text").launch()