Spaces:
Runtime error
Runtime error
Upload 3 files
Browse files- README.md +18 -52
- app.py +42 -26
- requirements.txt +3 -2
README.md
CHANGED
|
@@ -1,52 +1,18 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
- **
|
| 19 |
-
- **性能调优**:提供 Spark 内存调优、Hive 倾斜优化、ClickHouse 查询加速建议。
|
| 20 |
-
- **运维脚本**:自动生成 Shell、Python、SQL 等运维脚本及监控逻辑。
|
| 21 |
-
- **日志分析**:粘贴 Error Stacktrace,快速定位 OOM、超时、权限等常见故障。
|
| 22 |
-
|
| 23 |
-
## 🛠 部署架构
|
| 24 |
-
- **模型格式**:GGUF (Q4_K_M 量化版本)。
|
| 25 |
-
- **后端框架**:llama-cpp-python + FastAPI。
|
| 26 |
-
- **前端界面**:Gradio。
|
| 27 |
-
- **硬件环境**:Hugging Face CPU Basic (2 vCPU / 16GB RAM)。
|
| 28 |
-
|
| 29 |
-
---
|
| 30 |
-
|
| 31 |
-
## 🔌 Dify 接入指南 (API Mode)
|
| 32 |
-
|
| 33 |
-
本 Space 已经过优化,支持 OpenAI 兼容接口,可无缝对接 Dify 等大模型应用开发平台。
|
| 34 |
-
|
| 35 |
-
### 1. 获取 API 信息
|
| 36 |
-
- **API Endpoint**: `https://coco1990-bigdata-ops-copilot.hf.space/v1`
|
| 37 |
-
- **API Key**: 你的 Hugging Face Access Token
|
| 38 |
-
- **模型名称**: `qwen2.5-coder-7b`
|
| 39 |
-
|
| 40 |
-
### 2. Dify 配置步骤
|
| 41 |
-
1. 进入 Dify **设置 -> 模型供应商**。
|
| 42 |
-
2. 添加 **OpenAI-API-compatible** 类型供应商。
|
| 43 |
-
3. 填入上述 Endpoint 和 Key 即可完成连接。
|
| 44 |
-
|
| 45 |
-
> **⚠️ 注意事项**:
|
| 46 |
-
> - **自动休眠**:免费版 Space 在长时间无人访问后会进入休眠状态。若 Dify 调用失败,请手动访问本页面唤醒。
|
| 47 |
-
> - **推理速度**:由于运行在 CPU 环境,首字响应可能存在 5-10 秒延迟,请在 Dify 中适当调大超时时间。
|
| 48 |
-
|
| 49 |
-
---
|
| 50 |
-
|
| 51 |
-
## 📜 免责声明
|
| 52 |
-
本模型提供的建议仅供参考,在生产环境执行任何脚本或配置更改前,请务必在测试环境验证。
|
|
|
|
| 1 |
+
title: BigData Ops Copilot
|
| 2 |
+
emoji: 🐠
|
| 3 |
+
colorFrom: blue
|
| 4 |
+
colorTo: yellow
|
| 5 |
+
sdk: gradio
|
| 6 |
+
sdk_version: 6.3.0
|
| 7 |
+
app_file: app.py
|
| 8 |
+
pinned: false
|
| 9 |
+
license: apache-2.0
|
| 10 |
+
|
| 11 |
+
# 🚀 BigData Ops Copilot (Lightweight)
|
| 12 |
+
|
| 13 |
+
这是经过优化的轻量化版本,旨在解决 Hugging Face 免费 CPU 算力下编译缓慢的问题。
|
| 14 |
+
|
| 15 |
+
## 🔌 Dify 连接参数
|
| 16 |
+
- **Endpoint**: ` `https://coco1990-bigdata-ops-copilot.hf.space/v1` `
|
| 17 |
+
- **API Key**: 使用你的 HF Access Token
|
| 18 |
+
- **Model Name**: `qwen2.5-coder-7b`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app.py
CHANGED
|
@@ -1,49 +1,65 @@
|
|
| 1 |
import os
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from llama_cpp import Llama
|
| 4 |
-
from llama_cpp.server.app import create_app
|
| 5 |
from huggingface_hub import hf_hub_download
|
|
|
|
|
|
|
| 6 |
import uvicorn
|
| 7 |
|
| 8 |
-
# 1.
|
|
|
|
|
|
|
|
|
|
| 9 |
model_path = hf_hub_download(
|
| 10 |
repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
|
| 11 |
filename="qwen2.5-coder-7b-instruct-q4_k_m.gguf"
|
| 12 |
)
|
| 13 |
|
| 14 |
-
# 2. 初始化模型核心
|
| 15 |
-
# 注意:为了 API 性能,我们将 n_ctx 保持在 4096
|
| 16 |
llm = Llama(
|
| 17 |
model_path=model_path,
|
| 18 |
n_ctx=4096,
|
| 19 |
-
n_threads=2
|
|
|
|
| 20 |
)
|
| 21 |
|
| 22 |
-
# 3.
|
| 23 |
-
app
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
messages
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
delta = chunk['choices'][0]['delta']
|
| 37 |
-
if 'content' in delta:
|
| 38 |
-
response_text += delta['content']
|
| 39 |
-
yield response_text
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
-
# 5.
|
| 44 |
-
# 这样你访问 URL 时看到的是 UI,而 Dify 访问 /v1 时调用的是 API
|
| 45 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 46 |
|
| 47 |
if __name__ == "__main__":
|
| 48 |
-
# Hugging Face 指定必须监听 7860 端口
|
| 49 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import os
|
| 2 |
+
import time
|
| 3 |
import gradio as gr
|
| 4 |
from llama_cpp import Llama
|
|
|
|
| 5 |
from huggingface_hub import hf_hub_download
|
| 6 |
+
from fastapi import FastAPI, Request
|
| 7 |
+
from fastapi.responses import JSONResponse
|
| 8 |
import uvicorn
|
| 9 |
|
| 10 |
+
# 1. 初始化 FastAPI
|
| 11 |
+
app = FastAPI()
|
| 12 |
+
|
| 13 |
+
# 2. 下载并加载模型 (大数据架构师建议:使用 Q4_K_M 以平衡性能与内存)
|
| 14 |
model_path = hf_hub_download(
|
| 15 |
repo_id="Qwen/Qwen2.5-Coder-7B-Instruct-GGUF",
|
| 16 |
filename="qwen2.5-coder-7b-instruct-q4_k_m.gguf"
|
| 17 |
)
|
| 18 |
|
|
|
|
|
|
|
| 19 |
llm = Llama(
|
| 20 |
model_path=model_path,
|
| 21 |
n_ctx=4096,
|
| 22 |
+
n_threads=2,
|
| 23 |
+
verbose=False
|
| 24 |
)
|
| 25 |
|
| 26 |
+
# 3. 手动实现 OpenAI 兼容接口 (供 Dify 调用)
|
| 27 |
+
@app.post("/v1/chat/completions")
|
| 28 |
+
async def chat_completions(request: Request):
|
| 29 |
+
body = await request.json()
|
| 30 |
+
messages = body.get("messages", [])
|
| 31 |
+
|
| 32 |
+
# 将 OpenAI 格式转换为 llama-cpp 格式
|
| 33 |
+
response = llm.create_chat_completion(
|
| 34 |
+
messages=messages,
|
| 35 |
+
temperature=body.get("temperature", 0.3),
|
| 36 |
+
max_tokens=body.get("max_tokens", 1024),
|
| 37 |
+
stream=False
|
| 38 |
+
)
|
| 39 |
|
| 40 |
+
# 模拟 OpenAI 返回结构
|
| 41 |
+
return JSONResponse(content=response)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
+
# 4. Gradio 交互界面逻辑 (供手动调试)
|
| 44 |
+
def predict(message, history):
|
| 45 |
+
system_prompt = "你是一位资深大数据运维专家。请提供专业、安全、高效的脚本和调优建议。"
|
| 46 |
+
msgs = [{"role": "system", "content": system_prompt}]
|
| 47 |
+
for h in history:
|
| 48 |
+
msgs.append({"role": "user", "content": h[0]})
|
| 49 |
+
msgs.append({"role": "assistant", "content": h[1]})
|
| 50 |
+
msgs.append({"role": "user", "content": message})
|
| 51 |
+
|
| 52 |
+
output = llm.create_chat_completion(messages=msgs)
|
| 53 |
+
return output["choices"][0]["message"]["content"]
|
| 54 |
+
|
| 55 |
+
demo = gr.ChatInterface(
|
| 56 |
+
fn=predict,
|
| 57 |
+
title="BigData Ops Copilot (Lightweight Mode)",
|
| 58 |
+
description="免编译轻量化版 - 支持 Dify 接入"
|
| 59 |
+
)
|
| 60 |
|
| 61 |
+
# 5. 挂载 Gradio 并启动
|
|
|
|
| 62 |
app = gr.mount_gradio_app(app, demo, path="/")
|
| 63 |
|
| 64 |
if __name__ == "__main__":
|
|
|
|
| 65 |
uvicorn.run(app, host="0.0.0.0", port=7860)
|
requirements.txt
CHANGED
|
@@ -1,5 +1,6 @@
|
|
| 1 |
-
llama-cpp-python
|
| 2 |
gradio
|
| 3 |
huggingface_hub
|
| 4 |
fastapi
|
| 5 |
-
uvicorn
|
|
|
|
|
|
| 1 |
+
llama-cpp-python
|
| 2 |
gradio
|
| 3 |
huggingface_hub
|
| 4 |
fastapi
|
| 5 |
+
uvicorn
|
| 6 |
+
pydantic
|