CSC9090_qwen3-0.6b-base-2

Running

App Files Files Community

CSC9090_qwen3-0.6b-base-2 / app.py

badanwang

Update app.py

c4614a3 verified 5 months ago

raw

history blame

4.15 kB

	import gradio as gr
	import requests
	import os
	import json

	# --- 配置 ---
	# 从Hugging Face Space的Secrets中获取API Token
	# 请确保在你的Space设置中添加了名为 "HF_TOKEN" 的Secret
	HF_TOKEN = os.getenv("HF_TOKEN")
	API_URL = "https://api-inference.huggingface.co/models/badanwang/teacher_basic_qwen3-0.6b"

	# --- 核心对话函数 ---
	def predict(message, history):
	"""
	主函数，用于与Hugging Face Inference API进行流式对话。
	:param message: 用户当前发送的消息 (str)
	:param history: 对话历史 (list of lists)，格式为 [[user_msg, assistant_msg], ...]
	:return: 一个生成器 (generator)，逐字(token)返回模型的响应
	"""
	if not HF_TOKEN:
	raise gr.Error("Hugging Face API Token 未配置！请在Space的Secrets中添加 HF_TOKEN。")

	headers = {
	"Authorization": f"Bearer {HF_TOKEN}",
	"Content-Type": "application/json"
	}

	# 1. 格式化对话历史以符合API要求
	# API需要一个包含所有对话的列表，格式为 {"role": "user", "content": "..."} 或 {"role": "assistant", "content": "..."}
	messages = []
	for turn in history:
	user_msg, assistant_msg = turn
	messages.append({"role": "user", "content": user_msg})
	messages.append({"role": "assistant", "content": assistant_msg})

	# 添加当前用户消息
	messages.append({"role": "user", "content": message})

	# 2. 构建API请求体
	# 我们启用流式响应 (stream=True)
	payload = {
	"inputs": messages,
	"parameters": {
	"max_new_tokens": 2048, # 根据需要调整
	"temperature": 0.7,
	"top_p": 0.95,
	"repetition_penalty": 1.1,
	"return_full_text": False,
	},
	"stream": True
	}

	# 3. 发送流式请求并处理响应
	full_response = ""
	try:
	# 使用 requests 发送POST请求，并设置 stream=True
	with requests.post(API_URL, headers=headers, json=payload, stream=True, timeout=120) as response:
	# 检查HTTP响应状态码
	response.raise_for_status()

	# 逐行读取流式响应
	for line in response.iter_lines():
	if line:
	# 流式响应通常以 "data:" 开头，后跟一个JSON对象
	decoded_line = line.decode('utf-8')
	if decoded_line.startswith("data:"):
	try:
	# 解析JSON
	json_data = json.loads(decoded_line[5:])
	# 提取token文本
	token = json_data.get("token", {}).get("text", "")
	if token:
	full_response += token
	yield full_response
	except json.JSONDecodeError:
	# 忽略无法解析的行
	continue

	except requests.exceptions.RequestException as e:
	print(f"API请求错误: {e}")
	yield f"抱歉，与模型API通信时发生错误: {e}"
	except Exception as e:
	print(f"发生未知错误: {e}")
	yield f"抱歉，发生了一个未知错误: {e}"

	# --- 创建并启动Gradio界面 ---

	# 使用gr.ChatInterface，它为聊天机器人提供了完整的UI
	# fn=predict 指定了处理逻辑的函数
	# streaming=True 告诉Gradio我们的函数是流式的（使用yield）
	# Gradio 4.44.1中，ChatInterface会自动处理stream参数，我们只需确保函数是生成器
	demo = gr.ChatInterface(
	fn=predict,
	title="小Q老师 - 基础问答",
	description="与 badanwang/teacher_basic_qwen3-0.6b 模型进行流式对话。直接输入问题开始。",
	examples=[["你好"], ["请用python写一个快速排序算法"], ["给我讲个笑话吧"]],
	cache_examples=False,
	)

	if __name__ == "__main__":
	# demo.launch(share=True) # 如果在本地运行并需要分享链接
	demo.launch() # 在Hugging Face Spaces上运行时使用