Spaces:
Runtime error
Runtime error
| import os | |
| import time | |
| import gradio as gr | |
| from llama_cpp import Llama | |
| from huggingface_hub import hf_hub_download | |
| from fastapi import FastAPI, Request | |
| from fastapi.responses import JSONResponse | |
| import uvicorn | |
| # 1. 初始化 FastAPI | |
| app = FastAPI() | |
| # 2. 下载并加载模型 (大数据架构师建议:使用 Q4_K_M 以平衡性能与内存) | |
| model_path = hf_hub_download( | |
| repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF", | |
| filename="qwen2.5-coder-7b-instruct-q4_k_m.gguf" | |
| ) | |
| llm = Llama( | |
| model_path=model_path, | |
| n_ctx=4096, | |
| n_threads=2, | |
| verbose=False | |
| ) | |
| # 3. 手动实现 OpenAI 兼容接口 (供 Dify 调用) | |
| async def chat_completions(request: Request): | |
| body = await request.json() | |
| messages = body.get("messages", []) | |
| # 将 OpenAI 格式转换为 llama-cpp 格式 | |
| response = llm.create_chat_completion( | |
| messages=messages, | |
| temperature=body.get("temperature", 0.3), | |
| max_tokens=body.get("max_tokens", 1024), | |
| stream=False | |
| ) | |
| # 模拟 OpenAI 返回结构 | |
| return JSONResponse(content=response) | |
| # 4. Gradio 交互界面逻辑 (供手动调试) | |
| def predict(message, history): | |
| system_prompt = "你是一位资深大数据运维专家。请提供专业、安全、高效的脚本和调优建议。" | |
| msgs = [{"role": "system", "content": system_prompt}] | |
| for h in history: | |
| msgs.append({"role": "user", "content": h[0]}) | |
| msgs.append({"role": "assistant", "content": h[1]}) | |
| msgs.append({"role": "user", "content": message}) | |
| output = llm.create_chat_completion(messages=msgs) | |
| return output["choices"][0]["message"]["content"] | |
| demo = gr.ChatInterface( | |
| fn=predict, | |
| title="BigData Ops Copilot (Lightweight Mode)", | |
| description="免编译轻量化版 - 支持 Dify 接入" | |
| ) | |
| # 5. 挂载 Gradio 并启动 | |
| app = gr.mount_gradio_app(app, demo, path="/") | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=7860) |