kun2333 commited on
Commit
1a72434
·
verified ·
1 Parent(s): c46d8a9

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +38 -0
  2. README.md +97 -5
  3. app.py +626 -0
  4. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 使用官方Python运行时作为父镜像
2
+ FROM python:3.11-slim
3
+
4
+ # 设置工作目录
5
+ WORKDIR /app
6
+
7
+ # 设置环境变量
8
+ ENV PYTHONUNBUFFERED=1
9
+ ENV PYTHONDONTWRITEBYTECODE=1
10
+
11
+ # 安装系统依赖
12
+ RUN apt-get update && apt-get install -y \
13
+ curl \
14
+ && rm -rf /var/lib/apt/lists/*
15
+
16
+ # 复制requirements文件
17
+ COPY requirements.txt .
18
+
19
+ # 安装Python依赖
20
+ RUN pip install --no-cache-dir -r requirements.txt
21
+
22
+ # 复制应用代码
23
+ COPY app.py .
24
+
25
+ # 创建非root用户
26
+ RUN useradd --create-home --shell /bin/bash app \
27
+ && chown -R app:app /app
28
+ USER app
29
+
30
+ # 暴露端口
31
+ EXPOSE 7860
32
+
33
+ # 健康检查
34
+ HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
35
+ CMD curl -f http://localhost:7860/health || exit 1
36
+
37
+ # 启动命令
38
+ CMD ["python", "app.py"]
README.md CHANGED
@@ -1,11 +1,103 @@
1
  ---
2
- title: Anything Llm
3
- emoji: 💻
4
- colorFrom: gray
5
- colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  license: mit
 
9
  ---
10
 
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: AnythingLLM OpenAI API Proxy
3
+ emoji: 🔄
4
+ colorFrom: blue
5
+ colorTo: green
6
  sdk: docker
7
  pinned: false
8
  license: mit
9
+ app_port: 7860
10
  ---
11
 
12
+ # AnythingLLM OpenAI API Proxy
13
+
14
+ 这是一个将 AnythingLLM API 转换为 OpenAI API 格式的转发服务,部署在 Hugging Face Spaces 上。
15
+
16
+ ## 功能特性
17
+
18
+ - ✅ 完全兼容 OpenAI API 格式
19
+ - ✅ 支持流式和非流式响应
20
+ - ✅ 自动创建新的对话线程
21
+ - ✅ 智能令牌轮换和错误处理
22
+ - ✅ 客户端 API Key 认证
23
+ - ✅ 调试模式支持
24
+ - ✅ Hugging Face Spaces 部署优化
25
+
26
+ ## API 端点
27
+
28
+ ### 聊天完成
29
+ ```
30
+ POST /v1/chat/completions
31
+ ```
32
+
33
+ ### 模型列表
34
+ ```
35
+ GET /v1/models
36
+ ```
37
+
38
+ ### 健康检查
39
+ ```
40
+ GET /health
41
+ ```
42
+
43
+ ## 使用示例
44
+
45
+ ### Python (OpenAI 库)
46
+
47
+ ```python
48
+ from openai import OpenAI
49
+
50
+ client = OpenAI(
51
+ api_key="your-api-key",
52
+ base_url="https://your-space-name-your-username.hf.space/v1"
53
+ )
54
+
55
+ response = client.chat.completions.create(
56
+ model="anythingllm",
57
+ messages=[
58
+ {"role": "user", "content": "你好,请介绍一下自己"}
59
+ ],
60
+ stream=True
61
+ )
62
+
63
+ for chunk in response:
64
+ if chunk.choices[0].delta.content:
65
+ print(chunk.choices[0].delta.content, end="")
66
+ ```
67
+
68
+ ### cURL
69
+
70
+ ```bash
71
+ curl -X POST "https://your-space-name-your-username.hf.space/v1/chat/completions" \
72
+ -H "Content-Type: application/json" \
73
+ -H "Authorization: Bearer your-api-key" \
74
+ -d '{
75
+ "model": "anythingllm",
76
+ "messages": [
77
+ {"role": "user", "content": "你好,请介绍一下自己"}
78
+ ],
79
+ "stream": false
80
+ }'
81
+ ```
82
+
83
+ ## 环境变量配置
84
+
85
+ 在 Hugging Face Spaces 的设置中配置以下环境变量:
86
+
87
+ - `ANYTHINGLLM_BASE_URL`: AnythingLLM 实例的基础 URL
88
+ - `ANYTHINGLLM_WORKSPACE`: 工作空间名称
89
+ - `BEARER_TOKEN`: AnythingLLM 的 Bearer Token
90
+ - `CLIENT_API_KEYS`: 客户端 API 密钥(逗号分隔)
91
+ - `DEBUG`: 是否启用调试模式 (true/false)
92
+ - `REQUEST_TIMEOUT`: 请求超时时间(秒)
93
+
94
+ ## 支持的模型
95
+
96
+ - `claude-3-7-sonnet`
97
+ - `claude-sonnet-4`
98
+ - `deepseek-chat`
99
+ - `anythingllm`
100
+
101
+ ## 许可证
102
+
103
+ MIT License
app.py ADDED
@@ -0,0 +1,626 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ AnythingLLM to OpenAI API Proxy - Hugging Face Spaces版本
4
+ 基于完整的转发服务,适配Hugging Face Spaces部署
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import time
10
+ import uuid
11
+ import logging
12
+ import threading
13
+ from typing import Any, Dict, List, Optional, TypedDict, Union
14
+
15
+ import requests
16
+ from fastapi import FastAPI, HTTPException, Depends, Query, Request
17
+ from fastapi.responses import StreamingResponse, JSONResponse
18
+ from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
19
+ from fastapi.middleware.cors import CORSMiddleware
20
+ from pydantic import BaseModel, Field
21
+ import uvicorn
22
+
23
+
24
+ # 配置日志
25
+ logging.basicConfig(level=logging.INFO)
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # AnythingLLM Token Management
30
+ class AnythingLLMToken(TypedDict):
31
+ token: str
32
+ base_url: str
33
+ workspace: str
34
+ is_valid: bool
35
+ last_used: float
36
+ error_count: int
37
+
38
+
39
+ # Hugging Face Spaces配置
40
+ class HFConfig:
41
+ # 从环境变量读取配置
42
+ ANYTHINGLLM_BASE_URL = os.getenv("ANYTHINGLLM_BASE_URL", "https://ai.renpho.nl:3002")
43
+ ANYTHINGLLM_WORKSPACE = os.getenv("ANYTHINGLLM_WORKSPACE", "liufuwei")
44
+ BEARER_TOKEN = os.getenv("BEARER_TOKEN", "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpZCI6ODYsInVzZXJuYW1lIjoibGl1ZnV3ZWkiLCJpYXQiOjE3NTA3NTAwNjksImV4cCI6MjA2NjM2OTI2OX0.TVLxzR_Uleed1SFgZoa9D3i5pkxYHO24e_Q_vYo2iZA")
45
+ CLIENT_API_KEYS = os.getenv("CLIENT_API_KEYS", "sk-anythingllm-hf-spaces-default").split(",")
46
+ DEBUG = os.getenv("DEBUG", "false").lower() == "true"
47
+ REQUEST_TIMEOUT = int(os.getenv("REQUEST_TIMEOUT", "60"))
48
+
49
+
50
+ # 全局变量
51
+ VALID_CLIENT_KEYS: set = set(HFConfig.CLIENT_API_KEYS)
52
+ ANYTHINGLLM_TOKENS: List[AnythingLLMToken] = []
53
+ ANYTHINGLLM_MODELS: List[str] = ["claude-3-7-sonnet", "claude-sonnet-4", "deepseek-chat", "anythingllm"]
54
+ token_rotation_lock = threading.Lock()
55
+ MAX_ERROR_COUNT = 3
56
+ ERROR_COOLDOWN = 300
57
+ DEBUG_MODE = HFConfig.DEBUG
58
+
59
+
60
+ # Pydantic Models
61
+ class ChatMessage(BaseModel):
62
+ role: str
63
+ content: Union[str, List[Dict[str, Any]]]
64
+ reasoning_content: Optional[str] = None
65
+
66
+
67
+ class ChatCompletionRequest(BaseModel):
68
+ model: str
69
+ messages: List[ChatMessage]
70
+ stream: bool = True
71
+ temperature: Optional[float] = None
72
+ max_tokens: Optional[int] = None
73
+ top_p: Optional[float] = None
74
+
75
+
76
+ class ModelInfo(BaseModel):
77
+ id: str
78
+ object: str = "model"
79
+ created: int
80
+ owned_by: str
81
+
82
+
83
+ class ModelList(BaseModel):
84
+ object: str = "list"
85
+ data: List[ModelInfo]
86
+
87
+
88
+ class ChatCompletionChoice(BaseModel):
89
+ message: ChatMessage
90
+ index: int = 0
91
+ finish_reason: str = "stop"
92
+
93
+
94
+ class ChatCompletionResponse(BaseModel):
95
+ id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
96
+ object: str = "chat.completion"
97
+ created: int = Field(default_factory=lambda: int(time.time()))
98
+ model: str
99
+ choices: List[ChatCompletionChoice]
100
+ usage: Dict[str, int] = Field(
101
+ default_factory=lambda: {
102
+ "prompt_tokens": 0,
103
+ "completion_tokens": 0,
104
+ "total_tokens": 0,
105
+ }
106
+ )
107
+
108
+
109
+ class StreamChoice(BaseModel):
110
+ delta: Dict[str, Any] = Field(default_factory=dict)
111
+ index: int = 0
112
+ finish_reason: Optional[str] = None
113
+
114
+
115
+ class StreamResponse(BaseModel):
116
+ id: str = Field(default_factory=lambda: f"chatcmpl-{uuid.uuid4().hex}")
117
+ object: str = "chat.completion.chunk"
118
+ created: int = Field(default_factory=lambda: int(time.time()))
119
+ model: str
120
+ choices: List[StreamChoice]
121
+
122
+
123
+ # FastAPI App
124
+ app = FastAPI(
125
+ title="AnythingLLM OpenAI API Adapter - HF Spaces",
126
+ description="Converts AnythingLLM API to OpenAI-compatible format for Hugging Face Spaces",
127
+ version="1.0.0-hf"
128
+ )
129
+
130
+ # 添加CORS中间件
131
+ app.add_middleware(
132
+ CORSMiddleware,
133
+ allow_origins=["*"],
134
+ allow_credentials=True,
135
+ allow_methods=["GET", "POST", "PUT", "DELETE", "OPTIONS"],
136
+ allow_headers=["*"],
137
+ )
138
+
139
+ security = HTTPBearer(auto_error=False)
140
+
141
+
142
+ def log_debug(message: str):
143
+ """Debug日志函数"""
144
+ if DEBUG_MODE:
145
+ logger.info(f"[DEBUG] {message}")
146
+
147
+
148
+ def initialize_anythingllm_tokens():
149
+ """初始化AnythingLLM tokens"""
150
+ global ANYTHINGLLM_TOKENS
151
+ ANYTHINGLLM_TOKENS = [{
152
+ "token": HFConfig.BEARER_TOKEN,
153
+ "base_url": HFConfig.ANYTHINGLLM_BASE_URL,
154
+ "workspace": HFConfig.ANYTHINGLLM_WORKSPACE,
155
+ "is_valid": True,
156
+ "last_used": 0,
157
+ "error_count": 0
158
+ }]
159
+ logger.info(f"Initialized AnythingLLM tokens: {len(ANYTHINGLLM_TOKENS)}")
160
+
161
+
162
+ def get_best_anythingllm_token() -> Optional[AnythingLLMToken]:
163
+ """Get the best available AnythingLLM token"""
164
+ with token_rotation_lock:
165
+ now = time.time()
166
+ valid_tokens = [
167
+ token for token in ANYTHINGLLM_TOKENS
168
+ if token["is_valid"] and (
169
+ token["error_count"] < MAX_ERROR_COUNT or
170
+ now - token["last_used"] > ERROR_COOLDOWN
171
+ )
172
+ ]
173
+
174
+ if not valid_tokens:
175
+ return None
176
+
177
+ # Reset error count for tokens that have been in cooldown
178
+ for token in valid_tokens:
179
+ if token["error_count"] >= MAX_ERROR_COUNT and now - token["last_used"] > ERROR_COOLDOWN:
180
+ token["error_count"] = 0
181
+
182
+ # Sort by last used (oldest first) and error count (lowest first)
183
+ valid_tokens.sort(key=lambda x: (x["last_used"], x["error_count"]))
184
+ token = valid_tokens[0]
185
+ token["last_used"] = now
186
+ return token
187
+
188
+
189
+ def _convert_messages_to_anythingllm_format(messages: List[ChatMessage]) -> str:
190
+ """Convert OpenAI messages format to AnythingLLM message format"""
191
+ if not messages:
192
+ return ""
193
+
194
+ # Get the last user message as the current message
195
+ last_user_msg = None
196
+ for msg in reversed(messages):
197
+ if msg.role == "user":
198
+ last_user_msg = msg
199
+ break
200
+
201
+ if not last_user_msg:
202
+ raise HTTPException(status_code=400, detail="No user message found in the conversation.")
203
+
204
+ message = last_user_msg.content if isinstance(last_user_msg.content, str) else ""
205
+ return message
206
+
207
+
208
+ async def authenticate_client(
209
+ auth: Optional[HTTPAuthorizationCredentials] = Depends(security),
210
+ ):
211
+ """Authenticate client based on API key in Authorization header"""
212
+ if not VALID_CLIENT_KEYS:
213
+ # 在HF Spaces中,如果没有配置客户端密钥,则跳过认证
214
+ return
215
+
216
+ if not auth or not auth.credentials:
217
+ raise HTTPException(
218
+ status_code=401,
219
+ detail="API key required in Authorization header.",
220
+ headers={"WWW-Authenticate": "Bearer"},
221
+ )
222
+
223
+ if auth.credentials not in VALID_CLIENT_KEYS:
224
+ raise HTTPException(status_code=403, detail="Invalid client API key.")
225
+
226
+
227
+ def decode_text_content(text):
228
+ """解码文本内容"""
229
+ if not text:
230
+ return text
231
+ try:
232
+ if isinstance(text, str):
233
+ try:
234
+ decoded = text.encode('latin-1').decode('utf-8')
235
+ return decoded
236
+ except (UnicodeEncodeError, UnicodeDecodeError):
237
+ return text
238
+ return text
239
+ except Exception:
240
+ return text
241
+
242
+
243
+ @app.on_event("startup")
244
+ async def startup():
245
+ """应用启动时初始化配置"""
246
+ logger.info("Starting AnythingLLM OpenAI API Adapter for Hugging Face Spaces...")
247
+ initialize_anythingllm_tokens()
248
+ logger.info("Server initialization completed.")
249
+
250
+
251
+ def get_models_list_response() -> ModelList:
252
+ """Helper to construct ModelList response from cached models"""
253
+ model_infos = [
254
+ ModelInfo(
255
+ id=model,
256
+ created=int(time.time()),
257
+ owned_by="anythingllm"
258
+ )
259
+ for model in ANYTHINGLLM_MODELS
260
+ ]
261
+ return ModelList(data=model_infos)
262
+
263
+
264
+ def create_new_thread(token: AnythingLLMToken) -> Optional[str]:
265
+ """Create a new thread and return thread ID"""
266
+ try:
267
+ url = f"{token['base_url']}/api/workspace/{token['workspace']}/thread/new"
268
+ headers = {
269
+ "Authorization": f"Bearer {token['token']}",
270
+ "Accept": "*/*",
271
+ "Content-Type": "application/json",
272
+ }
273
+
274
+ response = requests.post(url, headers=headers, timeout=30)
275
+ response.raise_for_status()
276
+
277
+ data = response.json()
278
+ thread_slug = data.get("thread", {}).get("slug")
279
+ log_debug(f"Created new thread: {thread_slug}")
280
+ return thread_slug
281
+
282
+ except Exception as e:
283
+ log_debug(f"Failed to create new thread: {e}")
284
+ return None
285
+
286
+
287
+ def _anythingllm_stream_generator(response, model: str):
288
+ """Real-time streaming with format conversion - AnythingLLM to OpenAI"""
289
+ stream_id = f"chatcmpl-{uuid.uuid4().hex}"
290
+ created_time = int(time.time())
291
+
292
+ # 发送初始角色增量
293
+ yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={'role': 'assistant'})]).model_dump_json()}\n\n"
294
+
295
+ buffer = ""
296
+ full_content = ""
297
+
298
+ try:
299
+ for chunk in response.iter_content(chunk_size=1024):
300
+ if not chunk:
301
+ continue
302
+
303
+ chunk_text = chunk.decode("utf-8")
304
+ log_debug(f"Received chunk: {chunk_text[:100]}..." if len(chunk_text) > 100 else chunk_text)
305
+ buffer += chunk_text
306
+
307
+ # 处理缓冲区中的完整事件块
308
+ while "\n\n" in buffer:
309
+ event_data, buffer = buffer.split("\n\n", 1)
310
+ event_data = event_data.strip()
311
+
312
+ if not event_data or not event_data.startswith("data: "):
313
+ continue
314
+
315
+ try:
316
+ # 解析 AnythingLLM 的 SSE 数据
317
+ json_data = json.loads(event_data[6:]) # Remove "data: " prefix
318
+ log_debug(f"Parsed JSON: {json_data}")
319
+
320
+ if json_data.get("type") == "textResponseChunk":
321
+ text_response = json_data.get("textResponse", "")
322
+ if text_response:
323
+ # 解码文本内容
324
+ decoded_text = decode_text_content(text_response)
325
+
326
+ # 计算增量内容
327
+ if decoded_text.startswith(full_content):
328
+ delta = decoded_text[len(full_content):]
329
+ full_content = decoded_text
330
+
331
+ if delta:
332
+ openai_response = StreamResponse(
333
+ id=stream_id,
334
+ created=created_time,
335
+ model=model,
336
+ choices=[StreamChoice(delta={"content": delta})],
337
+ )
338
+ yield f"data: {openai_response.model_dump_json()}\n\n"
339
+
340
+ # 检查是否结束
341
+ if json_data.get("close", False):
342
+ log_debug("Received close signal.")
343
+ openai_response = StreamResponse(
344
+ id=stream_id,
345
+ created=created_time,
346
+ model=model,
347
+ choices=[StreamChoice(delta={}, finish_reason="stop")],
348
+ )
349
+ yield f"data: {openai_response.model_dump_json()}\n\n"
350
+ yield "data: [DONE]\n\n"
351
+ return
352
+
353
+ elif json_data.get("type") == "finalizeResponseStream":
354
+ log_debug("Received finalize signal.")
355
+ openai_response = StreamResponse(
356
+ id=stream_id,
357
+ created=created_time,
358
+ model=model,
359
+ choices=[StreamChoice(delta={}, finish_reason="stop")],
360
+ )
361
+ yield f"data: {openai_response.model_dump_json()}\n\n"
362
+ yield "data: [DONE]\n\n"
363
+ return
364
+
365
+ except json.JSONDecodeError as e:
366
+ log_debug(f"Failed to parse JSON: {e}")
367
+ continue
368
+
369
+ except Exception as e:
370
+ log_debug(f"Stream processing error: {e}")
371
+ yield f"data: {json.dumps({'error': str(e)})}\n\n"
372
+
373
+ # 如果流意外中断,也发送终止信号
374
+ log_debug("Stream finished unexpectedly, sending completion signal.")
375
+ yield f"data: {StreamResponse(id=stream_id, created=created_time, model=model, choices=[StreamChoice(delta={}, finish_reason='stop')]).model_dump_json()}\n\n"
376
+ yield "data: [DONE]\n\n"
377
+
378
+
379
+ def _build_anythingllm_non_stream_response(response, model: str) -> ChatCompletionResponse:
380
+ """Build non-streaming response by accumulating stream data"""
381
+ full_content = ""
382
+ buffer = ""
383
+
384
+ for chunk in response.iter_content(chunk_size=1024):
385
+ if not chunk:
386
+ continue
387
+
388
+ buffer += chunk.decode("utf-8")
389
+
390
+ # 处理缓冲区中的完整事件块
391
+ while "\n\n" in buffer:
392
+ event_data, buffer = buffer.split("\n\n", 1)
393
+ event_data = event_data.strip()
394
+
395
+ if not event_data or not event_data.startswith("data: "):
396
+ continue
397
+
398
+ try:
399
+ json_data = json.loads(event_data[6:])
400
+
401
+ if json_data.get("type") == "textResponseChunk":
402
+ text_response = json_data.get("textResponse", "")
403
+ if text_response:
404
+ full_content = decode_text_content(text_response)
405
+
406
+ if json_data.get("close", False):
407
+ return ChatCompletionResponse(
408
+ model=model,
409
+ choices=[
410
+ ChatCompletionChoice(
411
+ message=ChatMessage(
412
+ role="assistant",
413
+ content=full_content
414
+ )
415
+ )
416
+ ],
417
+ )
418
+
419
+ elif json_data.get("type") == "finalizeResponseStream":
420
+ return ChatCompletionResponse(
421
+ model=model,
422
+ choices=[
423
+ ChatCompletionChoice(
424
+ message=ChatMessage(
425
+ role="assistant",
426
+ content=full_content
427
+ )
428
+ )
429
+ ],
430
+ )
431
+
432
+ except json.JSONDecodeError:
433
+ continue
434
+
435
+ # 如果循环结束仍未返回
436
+ return ChatCompletionResponse(
437
+ model=model,
438
+ choices=[
439
+ ChatCompletionChoice(
440
+ message=ChatMessage(
441
+ role="assistant",
442
+ content=full_content
443
+ )
444
+ )
445
+ ],
446
+ )
447
+
448
+
449
+ @app.get("/v1/models", response_model=ModelList)
450
+ async def list_v1_models(_: None = Depends(authenticate_client)):
451
+ """List available models - authenticated"""
452
+ return get_models_list_response()
453
+
454
+
455
+ @app.get("/models", response_model=ModelList)
456
+ async def list_models_no_auth():
457
+ """List available models without authentication - for client compatibility"""
458
+ return get_models_list_response()
459
+
460
+
461
+ @app.get("/debug")
462
+ async def toggle_debug(enable: bool = Query(None)):
463
+ """切换调试模式"""
464
+ global DEBUG_MODE
465
+ if enable is not None:
466
+ DEBUG_MODE = enable
467
+ return {"debug_mode": DEBUG_MODE}
468
+
469
+
470
+ @app.post("/v1/chat/completions")
471
+ async def chat_completions(
472
+ request: ChatCompletionRequest, _: None = Depends(authenticate_client)
473
+ ):
474
+ """Create chat completion using AnythingLLM backend"""
475
+ if request.model not in ANYTHINGLLM_MODELS:
476
+ raise HTTPException(status_code=404, detail=f"Model '{request.model}' not found.")
477
+
478
+ if not request.messages:
479
+ raise HTTPException(status_code=400, detail="No messages provided in the request.")
480
+
481
+ log_debug(f"Processing request for model: {request.model}")
482
+
483
+ # 转换消息格式
484
+ try:
485
+ message = _convert_messages_to_anythingllm_format(request.messages)
486
+ except Exception as e:
487
+ raise HTTPException(status_code=400, detail=f"Failed to process messages: {str(e)}")
488
+
489
+ # 尝试所有令牌
490
+ for attempt in range(len(ANYTHINGLLM_TOKENS) + 1):
491
+ if attempt == len(ANYTHINGLLM_TOKENS):
492
+ raise HTTPException(
493
+ status_code=503,
494
+ detail="All attempts to contact AnythingLLM API failed."
495
+ )
496
+
497
+ token = get_best_anythingllm_token()
498
+ if not token:
499
+ raise HTTPException(
500
+ status_code=503,
501
+ detail="No valid AnythingLLM tokens available."
502
+ )
503
+
504
+ try:
505
+ # 创建新线程
506
+ thread_id = create_new_thread(token)
507
+ if not thread_id:
508
+ raise Exception("Failed to create new thread")
509
+
510
+ # 构建聊天请求
511
+ url = f"{token['base_url']}/api/workspace/{token['workspace']}/thread/{thread_id}/stream-chat"
512
+
513
+ payload = {
514
+ "message": message,
515
+ "attachments": []
516
+ }
517
+
518
+ headers = {
519
+ "Accept": "text/event-stream",
520
+ "Authorization": f"Bearer {token['token']}",
521
+ "Content-Type": "text/plain;charset=UTF-8",
522
+ }
523
+
524
+ log_debug(f"Sending request to AnythingLLM API: {url}")
525
+
526
+ response = requests.post(
527
+ url,
528
+ data=json.dumps(payload),
529
+ headers=headers,
530
+ stream=True,
531
+ timeout=HFConfig.REQUEST_TIMEOUT,
532
+ )
533
+ response.raise_for_status()
534
+
535
+ if request.stream:
536
+ log_debug("Returning stream response")
537
+ return StreamingResponse(
538
+ _anythingllm_stream_generator(response, request.model),
539
+ media_type="text/event-stream",
540
+ headers={
541
+ "Cache-Control": "no-cache",
542
+ "Connection": "keep-alive",
543
+ "X-Accel-Buffering": "no",
544
+ },
545
+ )
546
+ else:
547
+ log_debug("Building non-stream response")
548
+ return _build_anythingllm_non_stream_response(response, request.model)
549
+
550
+ except requests.HTTPError as e:
551
+ status_code = getattr(e.response, "status_code", 500)
552
+ error_detail = getattr(e.response, "text", str(e))
553
+ log_debug(f"AnythingLLM API error ({status_code}): {error_detail}")
554
+
555
+ with token_rotation_lock:
556
+ if status_code in [401, 403]:
557
+ # 标记令牌为无效
558
+ token["is_valid"] = False
559
+ logger.warning(f"Token ...{token['token'][-4:]} marked as invalid due to auth error.")
560
+ elif status_code in [429, 500, 502, 503, 504]:
561
+ # 增加错误计数
562
+ token["error_count"] += 1
563
+ logger.warning(f"Token ...{token['token'][-4:]} error count: {token['error_count']}")
564
+
565
+ except Exception as e:
566
+ log_debug(f"Request error: {e}")
567
+ with token_rotation_lock:
568
+ token["error_count"] += 1
569
+
570
+
571
+ @app.get("/health")
572
+ async def health_check():
573
+ """健康检查"""
574
+ return JSONResponse(content={
575
+ "status": "healthy",
576
+ "service": "anythingllm-to-openai-proxy-hf",
577
+ "version": "1.0.0-hf",
578
+ "timestamp": int(time.time()),
579
+ "config": {
580
+ "base_url": HFConfig.ANYTHINGLLM_BASE_URL,
581
+ "workspace": HFConfig.ANYTHINGLLM_WORKSPACE,
582
+ "debug": DEBUG_MODE,
583
+ "models": ANYTHINGLLM_MODELS
584
+ }
585
+ })
586
+
587
+
588
+ @app.get("/")
589
+ async def root():
590
+ """API根路径"""
591
+ return JSONResponse(content={
592
+ "service": "AnythingLLM to OpenAI API Proxy",
593
+ "version": "1.0.0-hf",
594
+ "platform": "Hugging Face Spaces",
595
+ "endpoints": {
596
+ "chat_completions": "/v1/chat/completions",
597
+ "models": "/v1/models",
598
+ "health": "/health",
599
+ "debug": "/debug"
600
+ },
601
+ "description": "Converts AnythingLLM API to OpenAI-compatible format",
602
+ "usage": {
603
+ "example": {
604
+ "url": "/v1/chat/completions",
605
+ "method": "POST",
606
+ "headers": {
607
+ "Authorization": "Bearer your-api-key",
608
+ "Content-Type": "application/json"
609
+ },
610
+ "body": {
611
+ "model": "anythingllm",
612
+ "messages": [{"role": "user", "content": "Hello"}],
613
+ "stream": False
614
+ }
615
+ }
616
+ }
617
+ })
618
+
619
+
620
+ if __name__ == "__main__":
621
+ uvicorn.run(
622
+ app,
623
+ host="0.0.0.0",
624
+ port=7860, # Hugging Face Spaces默认端口
625
+ log_level="info"
626
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi==0.104.1
2
+ uvicorn[standard]==0.24.0
3
+ requests==2.31.0
4
+ pydantic==2.5.0
5
+ python-multipart==0.0.6