Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| from huggingface_hub import InferenceClient | |
| # 1. 从环境变量中读取 Token | |
| hf_token = os.environ.get("HF_TOKEN") | |
| # 2. 定义模型 ID | |
| MODEL_ID = "meta-llama/Llama-3.2-3B-Instruct" | |
| # 3. 初始化 Hugging Face 推理客户端 | |
| client = InferenceClient(model=MODEL_ID, token=hf_token) | |
| # 4. 修复后的聊天逻辑 | |
| def chat_fn(message, history): | |
| messages = [] | |
| # 兼容标准的 Gradio 历史记录格式:[[user_msg1, ai_msg1], [user_msg2, ai_msg2], ...] | |
| for user_msg, assistant_msg in history: | |
| if user_msg: | |
| messages.append({"role": "user", "content": user_msg}) | |
| if assistant_msg: | |
| messages.append({"role": "assistant", "content": assistant_msg}) | |
| # 加上当前用户发送的新消息 | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| try: | |
| # 调用流式 API | |
| for message_chunk in client.chat_completion( | |
| messages=messages, | |
| max_tokens=1024, | |
| stream=True, | |
| temperature=0.7, | |
| top_p=0.9 | |
| ): | |
| token = message_chunk.choices[0].delta.content | |
| if token: | |
| response += token | |
| yield response | |
| except Exception as e: | |
| yield f"⚠️ 出错了!可能是 API 暂时繁忙。错误信息: {str(e)}" | |
| # 5. 构建 Gradio 聊天界面(删除了引发错误的 type="messages") | |
| demo = gr.ChatInterface( | |
| fn=chat_fn, | |
| title=f"🤖 我的专属大模型聊天室 ({MODEL_ID.split('/')[-1]})", | |
| description="基于 Hugging Face 免费 Serverless API 驱动,不占用本地硬件资源,速度极快!", | |
| examples=["你好,请自我介绍一下。", "用 Python 写一个快速排序算法。"] | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() |