Spaces:

Povlakek-ai
/

Code-WebUI

Sleeping

App Files Files Community

Code-WebUI / app.py

Povlakek-ai

Update app.py

7b7e8c9 verified about 1 month ago

Raw

History Blame Contribute Delete

1.82 kB

	import os
	import gradio as gr
	from huggingface_hub import InferenceClient

	# 1. 从环境变量中读取 Token
	hf_token = os.environ.get("HF_TOKEN")

	# 2. 定义模型 ID
	MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct"

	# 3. 初始化 Hugging Face 推理客户端
	client = InferenceClient(model=MODEL_ID, token=hf_token)

	# 4. 修复后的聊天逻辑
	def chat_fn(message, history):
	messages = []

	# 兼容标准的 Gradio 历史记录格式：[[user_msg1, ai_msg1], [user_msg2, ai_msg2], ...]
	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	# 加上当前用户发送的新消息
	messages.append({"role": "user", "content": message})

	response = ""
	try:
	# 调用流式 API
	for message_chunk in client.chat_completion(
	messages=messages,
	max_tokens=1024,
	stream=True,
	temperature=0.7,
	top_p=0.9
	):
	token = message_chunk.choices[0].delta.content
	if token:
	response += token
	yield response
	except Exception as e:
	yield f"⚠️ 出错了！可能是 API 暂时繁忙。错误信息: {str(e)}"

	# 5. 构建 Gradio 聊天界面（删除了引发错误的 type="messages"）
	demo = gr.ChatInterface(
	fn=chat_fn,
	title=f"🤖 我的专属大模型聊天室 ({MODEL_ID.split('/')[-1]})",
	description="基于 Hugging Face 免费 Serverless API 驱动，不占用本地硬件资源，速度极快！",
	examples=["你好，请自我介绍一下。", "用 Python 写一个快速排序算法。"]
	)

	if __name__ == "__main__":
	demo.launch()