Spaces:

kun2333
/

anything-llm

Sleeping

App Files Files Community

kun2333 commited on Jun 24, 2025

Commit

1a72434

verified ·

1 Parent(s): c46d8a9

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +38 -0
README.md +97 -5
app.py +626 -0
requirements.txt +5 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,38 @@

+# 使用官方Python运行时作为父镜像
+FROM python:3.11-slim
+# 设置工作目录
+WORKDIR /app
+# 设置环境变量
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+# 安装系统依赖
+RUN apt-get update && apt-get install -y \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# 复制requirements文件
+COPY requirements.txt .
+# 安装Python依赖
+RUN pip install --no-cache-dir -r requirements.txt
+# 复制应用代码
+COPY app.py .
+# 创建非root用户
+RUN useradd --create-home --shell /bin/bash app \
+    && chown -R app:app /app
+USER app
+# 暴露端口
+EXPOSE 7860
+# 健康检查
+HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
+    CMD curl -f http://localhost:7860/health || exit 1
+# 启动命令
+CMD ["python", "app.py"]

README.md CHANGED Viewed

@@ -1,11 +1,103 @@
 ---
-title: Anything Llm
-emoji: 💻
-colorFrom: gray
-colorTo: yellow
 sdk: docker
 pinned: false
 license: mit
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AnythingLLM OpenAI API Proxy
+emoji: 🔄
+colorFrom: blue
+colorTo: green
 sdk: docker
 pinned: false
 license: mit
+app_port: 7860
 ---
+# AnythingLLM OpenAI API Proxy
+这是一个将 AnythingLLM API 转换为 OpenAI API 格式的转发服务，部署在 Hugging Face Spaces 上。
+## 功能特性
+- ✅ 完全兼容 OpenAI API 格式
+- ✅ 支持流式和非流式响应
+- ✅ 自动创建新的对话线程
+- ✅ 智能令牌轮换和错误处理
+- ✅ 客户端 API Key 认证
+- ✅ 调试模式支持
+- ✅ Hugging Face Spaces 部署优化
+## API 端点
+### 聊天完成
+```
+POST /v1/chat/completions
+```
+### 模型列表
+```
+GET /v1/models
+```
+### 健康检查
+```
+GET /health
+```
+## 使用示例
+### Python (OpenAI 库)
+```python
+from openai import OpenAI
+client = OpenAI(
+    api_key="your-api-key",
+    base_url="https://your-space-name-your-username.hf.space/v1"
+)
+response = client.chat.completions.create(
+    model="anythingllm",
+    messages=[
+        {"role": "user", "content": "你好，请介绍一下自己"}
+    ],
+    stream=True
+)
+for chunk in response:
+    if chunk.choices[0].delta.content:
+        print(chunk.choices[0].delta.content, end="")
+```
+### cURL
+```bash
+curl -X POST "https://your-space-name-your-username.hf.space/v1/chat/completions" \
+  -H "Content-Type: application/json" \
+  -H "Authorization: Bearer your-api-key" \
+  -d '{
+    "model": "anythingllm",
+    "messages": [
+      {"role": "user", "content": "你好，请介绍一下自己"}
+    ],
+    "stream": false
+  }'
+```
+## 环境变量配置
+在 Hugging Face Spaces 的设置中配置以下环境变量：
+- `ANYTHINGLLM_BASE_URL`: AnythingLLM 实例的基础 URL
+- `ANYTHINGLLM_WORKSPACE`: 工作空间名称
+- `BEARER_TOKEN`: AnythingLLM 的 Bearer Token
+- `CLIENT_API_KEYS`: 客户端 API 密钥（逗号分隔）
+- `DEBUG`: 是否启用调试模式 (true/false)
+- `REQUEST_TIMEOUT`: 请求超时时间（秒）
+## 支持的模型
+- `claude-3-7-sonnet`
+- `claude-sonnet-4`
+- `deepseek-chat`
+- `anythingllm`
+## 许可证
+MIT License

app.py ADDED Viewed

	@@ -0,0 +1,626 @@

+#!/usr/bin/env python3
+"""
+AnythingLLM to OpenAI API Proxy - Hugging Face Spaces版本
+基于完整的转发服务，适配Hugging Face Spaces部署
+"""
+import json
+import os
+import time
+import uuid
+import logging
+import threading
+from typing import Any, Dict, List, Optional, TypedDict, Union
+import requests
+from fastapi import FastAPI, HTTPException, Depends, Query, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel, Field
+import uvicorn
+# 配置日志
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+# AnythingLLM Token Management
+class AnythingLLMToken(TypedDict):
+    token: str
+    base_url: str
+    workspace: str
+    is_valid: bool
+    last_used: float
+    error_count: int
+# Hugging Face Spaces配置
+class HFConfig:
+    # 从环境变量读取配置
+    ANYTHINGLLM_BASE_URL = os.getenv("ANYTHINGLLM_BASE_URL", "https://ai.renpho.nl:3002")
+    ANYTHINGLLM_WORKSPACE = os.getenv("ANYTHINGLLM_WORKSPACE", "liufuwei")
+    BEARER_TOKEN = os.getenv("BEARER_TOKEN", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ODYsInVzZXJuYW1lIjoibGl1ZnV3ZWkiLCJpYXQiOjE3NTA3NTAwNjksImV4cCI6MjA2NjM2OTI2OX0.TVLxzR_Uleed1SFgZoa9D3i5pkxYHO24e_Q_vYo2iZA")
+    CLIENT_API_KEYS = os.getenv("CLIENT_API_KEYS", "sk-anythingllm-hf-spaces-default").split(",")
+    DEBUG = os.getenv("DEBUG", "false").lower() == "true"
+    REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
+# 全局变量
+VALID_CLIENT_KEYS: set = set(HFConfig.CLIENT_API_KEYS)
+ANYTHINGLLM_TOKENS: List[AnythingLLMToken] = []
+ANYTHINGLLM_MODELS: List[str] = ["claude-3-7-sonnet", "claude-sonnet-4", "deepseek-chat", "anythingllm"]
+token_rotation_lock = threading.Lock()
+MAX_ERROR_COUNT = 3
+ERROR_COOLDOWN = 300
+DEBUG_MODE = HFConfig.DEBUG
+# Pydantic Models
+class ChatMessage(BaseModel):
+    role: str
+    content: Union[str, List[Dict[str, Any]]]
+    reasoning_content: Optional[str] = None
+class ChatCompletionRequest(BaseModel):
+    model: str
+    messages: List[ChatMessage]
+    stream: bool = True
+    temperature: Optional[float] = None
+    max_tokens: Optional[int] = None
+    top_p: Optional[float] = None
+class ModelInfo(BaseModel):
+    id: str
+    object: str = "model"
+    created: int
+    owned_by: str
+class ModelList(BaseModel):
+    object: str = "list"
+    data: List[ModelInfo]
+class ChatCompletionChoice(BaseModel):
+    message: ChatMessage
+    index: int = 0
+    finish_reason: str = "stop"
+class ChatCompletionResponse(BaseModel):
+    id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
+    object: str = "chat.completion"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: List[ChatCompletionChoice]
+    usage: Dict[str, int] = Field(
+        default_factory=lambda: {
+            "prompt_tokens": 0,
+            "completion_tokens": 0,
+            "total_tokens": 0,
+        }
+    )
+class StreamChoice(BaseModel):
+    delta: Dict[str, Any] = Field(default_factory=dict)
+    index: int = 0
+    finish_reason: Optional[str] = None
+class StreamResponse(BaseModel):
+    id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
+    object: str = "chat.completion.chunk"
+    created: int = Field(default_factory=lambda: int(time.time()))
+    model: str
+    choices: List[StreamChoice]
+# FastAPI App
+app = FastAPI(
+    title="AnythingLLM OpenAI API Adapter - HF Spaces",
+    description="Converts AnythingLLM API to OpenAI-compatible format for Hugging Face Spaces",
+    version="1.0.0-hf"
+)
+# 添加CORS中间件
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
+    allow_headers=["*"],
+)
+security = HTTPBearer(auto_error=False)
+def log_debug(message: str):
+    """Debug日志函数"""
+    if DEBUG_MODE:
+        logger.info(f"[DEBUG] {message}")
+def initialize_anythingllm_tokens():
+    """初始化AnythingLLM tokens"""
+    global ANYTHINGLLM_TOKENS
+    ANYTHINGLLM_TOKENS = [{
+        "token": HFConfig.BEARER_TOKEN,
+        "base_url": HFConfig.ANYTHINGLLM_BASE_URL,
+        "workspace": HFConfig.ANYTHINGLLM_WORKSPACE,
+        "is_valid": True,
+        "last_used": 0,
+        "error_count": 0
+    }]
+    logger.info(f"Initialized AnythingLLM tokens: {len(ANYTHINGLLM_TOKENS)}")
+def get_best_anythingllm_token() -> Optional[AnythingLLMToken]:
+    """Get the best available AnythingLLM token"""
+    with token_rotation_lock:
+        now = time.time()
+        valid_tokens = [
+            token for token in ANYTHINGLLM_TOKENS
+            if token["is_valid"] and (
+                token["error_count"] < MAX_ERROR_COUNT or
+                now - token["last_used"] > ERROR_COOLDOWN
+            )
+        ]
+        if not valid_tokens:
+            return None
+        # Reset error count for tokens that have been in cooldown
+        for token in valid_tokens:
+            if token["error_count"] >= MAX_ERROR_COUNT and now - token["last_used"] > ERROR_COOLDOWN:
+                token["error_count"] = 0
+        # Sort by last used (oldest first) and error count (lowest first)
+        valid_tokens.sort(key=lambda x: (x["last_used"], x["error_count"]))
+        token = valid_tokens[0]
+        token["last_used"] = now
+        return token
+def _convert_messages_to_anythingllm_format(messages: List[ChatMessage]) -> str:
+    """Convert OpenAI messages format to AnythingLLM message format"""
+    if not messages:
+        return ""
+    # Get the last user message as the current message
+    last_user_msg = None
+    for msg in reversed(messages):
+        if msg.role == "user":
+            last_user_msg = msg
+            break
+    if not last_user_msg:
+        raise HTTPException(status_code=400, detail="No user message found in the conversation.")
+    message = last_user_msg.content if isinstance(last_user_msg.content, str) else ""
+    return message
+async def authenticate_client(
+    auth: Optional[HTTPAuthorizationCredentials] = Depends(security),
+):
+    """Authenticate client based on API key in Authorization header"""
+    if not VALID_CLIENT_KEYS:
+        # 在HF Spaces中，如果没有配置客户端密钥，则跳过认证
+        return
+    if not auth or not auth.credentials:
+        raise HTTPException(
+            status_code=401,
+            detail="API key required in Authorization header.",
+            headers={"WWW-Authenticate": "Bearer"},
+        )
+    if auth.credentials not in VALID_CLIENT_KEYS:
+        raise HTTPException(status_code=403, detail="Invalid client API key.")
+def decode_text_content(text):
+    """解码文本内容"""
+    if not text:
+        return text
+    try:
+        if isinstance(text, str):
+            try:
+                decoded = text.encode('latin-1').decode('utf-8')
+                return decoded
+            except (UnicodeEncodeError, UnicodeDecodeError):
+                return text
+        return text
+    except Exception:
+        return text
+@app.on_event("startup")
+async def startup():
+    """应用启动时初始化配置"""
+    logger.info("Starting AnythingLLM OpenAI API Adapter for Hugging Face Spaces...")
+    initialize_anythingllm_tokens()
+    logger.info("Server initialization completed.")
+def get_models_list_response() -> ModelList:
+    """Helper to construct ModelList response from cached models"""
+    model_infos = [
+        ModelInfo(
+            id=model,
+            created=int(time.time()),
+            owned_by="anythingllm"
+        )
+        for model in ANYTHINGLLM_MODELS
+    ]
+    return ModelList(data=model_infos)
+def create_new_thread(token: AnythingLLMToken) -> Optional[str]:
+    """Create a new thread and return thread ID"""
+    try:
+        url = f"{token['base_url']}/api/workspace/{token['workspace']}/thread/new"
+        headers = {
+            "Authorization": f"Bearer {token['token']}",
+            "Accept": "*/*",
+            "Content-Type": "application/json",
+        }
+        response = requests.post(url, headers=headers, timeout=30)
+        response.raise_for_status()
+        data = response.json()
+        thread_slug = data.get("thread", {}).get("slug")
+        log_debug(f"Created new thread: {thread_slug}")
+        return thread_slug
+    except Exception as e:
+        log_debug(f"Failed to create new thread: {e}")
+        return None
+def _anythingllm_stream_generator(response, model: str):
+    """Real-time streaming with format conversion - AnythingLLM to OpenAI"""
+    stream_id = f"chatcmpl-{uuid.uuid4().hex}"
+    created_time = int(time.time())
+    # 发送初始角色增量
+    yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={'role': 'assistant'})]).model_dump_json()}\n\n"
+    buffer = ""
+    full_content = ""
+    try:
+        for chunk in response.iter_content(chunk_size=1024):
+            if not chunk:
+                continue
+            chunk_text = chunk.decode("utf-8")
+            log_debug(f"Received chunk: {chunk_text[:100]}..." if len(chunk_text) > 100 else chunk_text)
+            buffer += chunk_text
+            # 处理缓冲区中的完整事件块
+            while "\n\n" in buffer:
+                event_data, buffer = buffer.split("\n\n", 1)
+                event_data = event_data.strip()
+                if not event_data or not event_data.startswith("data: "):
+                    continue
+                try:
+                    # 解析 AnythingLLM 的 SSE 数据
+                    json_data = json.loads(event_data[6:])  # Remove "data: " prefix
+                    log_debug(f"Parsed JSON: {json_data}")
+                    if json_data.get("type") == "textResponseChunk":
+                        text_response = json_data.get("textResponse", "")
+                        if text_response:
+                            # 解码文本内容
+                            decoded_text = decode_text_content(text_response)
+                            # 计算增量内容
+                            if decoded_text.startswith(full_content):
+                                delta = decoded_text[len(full_content):]
+                                full_content = decoded_text
+                                if delta:
+                                    openai_response = StreamResponse(
+                                        id=stream_id,
+                                        created=created_time,
+                                        model=model,
+                                        choices=[StreamChoice(delta={"content": delta})],
+                                    )
+                                    yield f"data: {openai_response.model_dump_json()}\n\n"
+                        # 检查是否结束
+                        if json_data.get("close", False):
+                            log_debug("Received close signal.")
+                            openai_response = StreamResponse(
+                                id=stream_id,
+                                created=created_time,
+                                model=model,
+                                choices=[StreamChoice(delta={}, finish_reason="stop")],
+                            )
+                            yield f"data: {openai_response.model_dump_json()}\n\n"
+                            yield "data: [DONE]\n\n"
+                            return
+                    elif json_data.get("type") == "finalizeResponseStream":
+                        log_debug("Received finalize signal.")
+                        openai_response = StreamResponse(
+                            id=stream_id,
+                            created=created_time,
+                            model=model,
+                            choices=[StreamChoice(delta={}, finish_reason="stop")],
+                        )
+                        yield f"data: {openai_response.model_dump_json()}\n\n"
+                        yield "data: [DONE]\n\n"
+                        return
+                except json.JSONDecodeError as e:
+                    log_debug(f"Failed to parse JSON: {e}")
+                    continue
+    except Exception as e:
+        log_debug(f"Stream processing error: {e}")
+        yield f"data: {json.dumps({'error': str(e)})}\n\n"
+    # 如果流意外中断，也发送终止信号
+    log_debug("Stream finished unexpectedly, sending completion signal.")
+    yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={}, finish_reason='stop')]).model_dump_json()}\n\n"
+    yield "data: [DONE]\n\n"
+def _build_anythingllm_non_stream_response(response, model: str) -> ChatCompletionResponse:
+    """Build non-streaming response by accumulating stream data"""
+    full_content = ""
+    buffer = ""
+    for chunk in response.iter_content(chunk_size=1024):
+        if not chunk:
+            continue
+        buffer += chunk.decode("utf-8")
+        # 处理缓冲区中的完整事件块
+        while "\n\n" in buffer:
+            event_data, buffer = buffer.split("\n\n", 1)
+            event_data = event_data.strip()
+            if not event_data or not event_data.startswith("data: "):
+                continue
+            try:
+                json_data = json.loads(event_data[6:])
+                if json_data.get("type") == "textResponseChunk":
+                    text_response = json_data.get("textResponse", "")
+                    if text_response:
+                        full_content = decode_text_content(text_response)
+                    if json_data.get("close", False):
+                        return ChatCompletionResponse(
+                            model=model,
+                            choices=[
+                                ChatCompletionChoice(
+                                    message=ChatMessage(
+                                        role="assistant",
+                                        content=full_content
+                                    )
+                                )
+                            ],
+                        )
+                elif json_data.get("type") == "finalizeResponseStream":
+                    return ChatCompletionResponse(
+                        model=model,
+                        choices=[
+                            ChatCompletionChoice(
+                                message=ChatMessage(
+                                    role="assistant",
+                                    content=full_content
+                                )
+                            )
+                        ],
+                    )
+            except json.JSONDecodeError:
+                continue
+    # 如果循环结束仍未返回
+    return ChatCompletionResponse(
+        model=model,
+        choices=[
+            ChatCompletionChoice(
+                message=ChatMessage(
+                    role="assistant",
+                    content=full_content
+                )
+            )
+        ],
+    )
+@app.get("/v1/models", response_model=ModelList)
+async def list_v1_models(_: None = Depends(authenticate_client)):
+    """List available models - authenticated"""
+    return get_models_list_response()
+@app.get("/models", response_model=ModelList)
+async def list_models_no_auth():
+    """List available models without authentication - for client compatibility"""
+    return get_models_list_response()
+@app.get("/debug")
+async def toggle_debug(enable: bool = Query(None)):
+    """切换调试模式"""
+    global DEBUG_MODE
+    if enable is not None:
+        DEBUG_MODE = enable
+    return {"debug_mode": DEBUG_MODE}
+@app.post("/v1/chat/completions")
+async def chat_completions(
+    request: ChatCompletionRequest, _: None = Depends(authenticate_client)
+):
+    """Create chat completion using AnythingLLM backend"""
+    if request.model not in ANYTHINGLLM_MODELS:
+        raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found.")
+    if not request.messages:
+        raise HTTPException(status_code=400, detail="No messages provided in the request.")
+    log_debug(f"Processing request for model: {request.model}")
+    # 转换消息格式
+    try:
+        message = _convert_messages_to_anythingllm_format(request.messages)
+    except Exception as e:
+        raise HTTPException(status_code=400, detail=f"Failed to process messages: {str(e)}")
+    # 尝试所有令牌
+    for attempt in range(len(ANYTHINGLLM_TOKENS) + 1):
+        if attempt == len(ANYTHINGLLM_TOKENS):
+            raise HTTPException(
+                status_code=503,
+                detail="All attempts to contact AnythingLLM API failed."
+            )
+        token = get_best_anythingllm_token()
+        if not token:
+            raise HTTPException(
+                status_code=503,
+                detail="No valid AnythingLLM tokens available."
+            )
+        try:
+            # 创建新线程
+            thread_id = create_new_thread(token)
+            if not thread_id:
+                raise Exception("Failed to create new thread")
+            # 构建聊天请求
+            url = f"{token['base_url']}/api/workspace/{token['workspace']}/thread/{thread_id}/stream-chat"
+            payload = {
+                "message": message,
+                "attachments": []
+            }
+            headers = {
+                "Accept": "text/event-stream",
+                "Authorization": f"Bearer {token['token']}",
+                "Content-Type": "text/plain;charset=UTF-8",
+            }
+            log_debug(f"Sending request to AnythingLLM API: {url}")
+            response = requests.post(
+                url,
+                data=json.dumps(payload),
+                headers=headers,
+                stream=True,
+                timeout=HFConfig.REQUEST_TIMEOUT,
+            )
+            response.raise_for_status()
+            if request.stream:
+                log_debug("Returning stream response")
+                return StreamingResponse(
+                    _anythingllm_stream_generator(response, request.model),
+                    media_type="text/event-stream",
+                    headers={
+                        "Cache-Control": "no-cache",
+                        "Connection": "keep-alive",
+                        "X-Accel-Buffering": "no",
+                    },
+                )
+            else:
+                log_debug("Building non-stream response")
+                return _build_anythingllm_non_stream_response(response, request.model)
+        except requests.HTTPError as e:
+            status_code = getattr(e.response, "status_code", 500)
+            error_detail = getattr(e.response, "text", str(e))
+            log_debug(f"AnythingLLM API error ({status_code}): {error_detail}")
+            with token_rotation_lock:
+                if status_code in [401, 403]:
+                    # 标记令牌为无效
+                    token["is_valid"] = False
+                    logger.warning(f"Token ...{token['token'][-4:]} marked as invalid due to auth error.")
+                elif status_code in [429, 500, 502, 503, 504]:
+                    # 增加错误计数
+                    token["error_count"] += 1
+                    logger.warning(f"Token ...{token['token'][-4:]} error count: {token['error_count']}")
+        except Exception as e:
+            log_debug(f"Request error: {e}")
+            with token_rotation_lock:
+                token["error_count"] += 1
+@app.get("/health")
+async def health_check():
+    """健康检查"""
+    return JSONResponse(content={
+        "status": "healthy",
+        "service": "anythingllm-to-openai-proxy-hf",
+        "version": "1.0.0-hf",
+        "timestamp": int(time.time()),
+        "config": {
+            "base_url": HFConfig.ANYTHINGLLM_BASE_URL,
+            "workspace": HFConfig.ANYTHINGLLM_WORKSPACE,
+            "debug": DEBUG_MODE,
+            "models": ANYTHINGLLM_MODELS
+        }
+    })
+@app.get("/")
+async def root():
+    """API根路径"""
+    return JSONResponse(content={
+        "service": "AnythingLLM to OpenAI API Proxy",
+        "version": "1.0.0-hf",
+        "platform": "Hugging Face Spaces",
+        "endpoints": {
+            "chat_completions": "/v1/chat/completions",
+            "models": "/v1/models",
+            "health": "/health",
+            "debug": "/debug"
+        },
+        "description": "Converts AnythingLLM API to OpenAI-compatible format",
+        "usage": {
+            "example": {
+                "url": "/v1/chat/completions",
+                "method": "POST",
+                "headers": {
+                    "Authorization": "Bearer your-api-key",
+                    "Content-Type": "application/json"
+                },
+                "body": {
+                    "model": "anythingllm",
+                    "messages": [{"role": "user", "content": "Hello"}],
+                    "stream": False
+                }
+            }
+        }
+    })
+if __name__ == "__main__":
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=7860,  # Hugging Face Spaces默认端口
+        log_level="info"
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+fastapi==0.104.1
+uvicorn[standard]==0.24.0
+requests==2.31.0
+pydantic==2.5.0
+python-multipart==0.0.6