add support for PerfXCloud (#1883)
Browse files### What problem does this PR solve?
#1853 add support for PerfXCloud
### Type of change
- [x] New Feature (non-breaking change which adds functionality)
Co-authored-by: Zhedong Cen <cenzhedong2@126.com>
conf/llm_factories.json
CHANGED
|
@@ -2442,6 +2442,158 @@
|
|
| 2442 |
"model_type": "chat"
|
| 2443 |
}
|
| 2444 |
]
|
| 2445 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2446 |
]
|
| 2447 |
}
|
|
|
|
| 2442 |
"model_type": "chat"
|
| 2443 |
}
|
| 2444 |
]
|
| 2445 |
+
},
|
| 2446 |
+
{
|
| 2447 |
+
"name": "PerfXCloud",
|
| 2448 |
+
"logo": "",
|
| 2449 |
+
"tags": "LLM,TEXT EMBEDDING",
|
| 2450 |
+
"status": "1",
|
| 2451 |
+
"llm": [
|
| 2452 |
+
{
|
| 2453 |
+
"llm_name": "deepseek-v2-chat",
|
| 2454 |
+
"tags": "LLM,CHAT,4k",
|
| 2455 |
+
"max_tokens": 4096,
|
| 2456 |
+
"model_type": "chat"
|
| 2457 |
+
},
|
| 2458 |
+
{
|
| 2459 |
+
"llm_name": "llama3.1:405b",
|
| 2460 |
+
"tags": "LLM,CHAT,128k",
|
| 2461 |
+
"max_tokens": 131072,
|
| 2462 |
+
"model_type": "chat"
|
| 2463 |
+
},
|
| 2464 |
+
{
|
| 2465 |
+
"llm_name": "Qwen2-72B-Instruct",
|
| 2466 |
+
"tags": "LLM,CHAT,128k",
|
| 2467 |
+
"max_tokens": 131072,
|
| 2468 |
+
"model_type": "chat"
|
| 2469 |
+
},
|
| 2470 |
+
{
|
| 2471 |
+
"llm_name": "Qwen2-72B-Instruct-GPTQ-Int4",
|
| 2472 |
+
"tags": "LLM,CHAT,2k",
|
| 2473 |
+
"max_tokens": 2048,
|
| 2474 |
+
"model_type": "chat"
|
| 2475 |
+
},
|
| 2476 |
+
{
|
| 2477 |
+
"llm_name": "Qwen2-72B-Instruct-awq-int4",
|
| 2478 |
+
"tags": "LLM,CHAT,32k",
|
| 2479 |
+
"max_tokens": 32768,
|
| 2480 |
+
"model_type": "chat"
|
| 2481 |
+
},
|
| 2482 |
+
{
|
| 2483 |
+
"llm_name": "Llama3-Chinese_v2",
|
| 2484 |
+
"tags": "LLM,CHAT,8k",
|
| 2485 |
+
"max_tokens": 8192,
|
| 2486 |
+
"model_type": "chat"
|
| 2487 |
+
},
|
| 2488 |
+
{
|
| 2489 |
+
"llm_name": "Yi-1_5-9B-Chat-16K",
|
| 2490 |
+
"tags": "LLM,CHAT,16k",
|
| 2491 |
+
"max_tokens": 16384,
|
| 2492 |
+
"model_type": "chat"
|
| 2493 |
+
},
|
| 2494 |
+
{
|
| 2495 |
+
"llm_name": "Qwen1.5-72B-Chat-GPTQ-Int4",
|
| 2496 |
+
"tags": "LLM,CHAT,2k",
|
| 2497 |
+
"max_tokens": 2048,
|
| 2498 |
+
"model_type": "chat"
|
| 2499 |
+
},
|
| 2500 |
+
{
|
| 2501 |
+
"llm_name": "Meta-Llama-3.1-8B-Instruct",
|
| 2502 |
+
"tags": "LLM,CHAT,4k",
|
| 2503 |
+
"max_tokens": 4096,
|
| 2504 |
+
"model_type": "chat"
|
| 2505 |
+
},
|
| 2506 |
+
{
|
| 2507 |
+
"llm_name": "Qwen2-7B-Instruct",
|
| 2508 |
+
"tags": "LLM,CHAT,32k",
|
| 2509 |
+
"max_tokens": 32768,
|
| 2510 |
+
"model_type": "chat"
|
| 2511 |
+
},
|
| 2512 |
+
{
|
| 2513 |
+
"llm_name": "deepseek-v2-lite-chat",
|
| 2514 |
+
"tags": "LLM,CHAT,2k",
|
| 2515 |
+
"max_tokens": 2048,
|
| 2516 |
+
"model_type": "chat"
|
| 2517 |
+
},
|
| 2518 |
+
{
|
| 2519 |
+
"llm_name": "Qwen2-7B",
|
| 2520 |
+
"tags": "LLM,CHAT,128k",
|
| 2521 |
+
"max_tokens": 131072,
|
| 2522 |
+
"model_type": "chat"
|
| 2523 |
+
},
|
| 2524 |
+
{
|
| 2525 |
+
"llm_name": "chatglm3-6b",
|
| 2526 |
+
"tags": "LLM,CHAT,8k",
|
| 2527 |
+
"max_tokens": 8192,
|
| 2528 |
+
"model_type": "chat"
|
| 2529 |
+
},
|
| 2530 |
+
{
|
| 2531 |
+
"llm_name": "Meta-Llama-3-70B-Instruct-GPTQ-Int4",
|
| 2532 |
+
"tags": "LLM,CHAT,1k",
|
| 2533 |
+
"max_tokens": 1024,
|
| 2534 |
+
"model_type": "chat"
|
| 2535 |
+
},
|
| 2536 |
+
{
|
| 2537 |
+
"llm_name": "Meta-Llama-3-8B-Instruct",
|
| 2538 |
+
"tags": "LLM,CHAT,8k",
|
| 2539 |
+
"max_tokens": 8192,
|
| 2540 |
+
"model_type": "chat"
|
| 2541 |
+
},
|
| 2542 |
+
{
|
| 2543 |
+
"llm_name": "Mistral-7B-Instruct",
|
| 2544 |
+
"tags": "LLM,CHAT,32k",
|
| 2545 |
+
"max_tokens": 32768,
|
| 2546 |
+
"model_type": "chat"
|
| 2547 |
+
},
|
| 2548 |
+
{
|
| 2549 |
+
"llm_name": "MindChat-Qwen-7B-v2",
|
| 2550 |
+
"tags": "LLM,CHAT,2k",
|
| 2551 |
+
"max_tokens": 2048,
|
| 2552 |
+
"model_type": "chat"
|
| 2553 |
+
},
|
| 2554 |
+
{
|
| 2555 |
+
"llm_name": "phi-2",
|
| 2556 |
+
"tags": "LLM,CHAT,2k",
|
| 2557 |
+
"max_tokens": 2048,
|
| 2558 |
+
"model_type": "chat"
|
| 2559 |
+
},
|
| 2560 |
+
{
|
| 2561 |
+
"llm_name": "SOLAR-10_7B-Instruct",
|
| 2562 |
+
"tags": "LLM,CHAT,4k",
|
| 2563 |
+
"max_tokens": 4096,
|
| 2564 |
+
"model_type": "chat"
|
| 2565 |
+
},
|
| 2566 |
+
{
|
| 2567 |
+
"llm_name": "Mixtral-8x7B-Instruct-v0.1-GPTQ",
|
| 2568 |
+
"tags": "LLM,CHAT,32k",
|
| 2569 |
+
"max_tokens": 32768,
|
| 2570 |
+
"model_type": "chat"
|
| 2571 |
+
},
|
| 2572 |
+
{
|
| 2573 |
+
"llm_name": "Qwen1.5-7B",
|
| 2574 |
+
"tags": "LLM,CHAT,32k",
|
| 2575 |
+
"max_tokens": 32768,
|
| 2576 |
+
"model_type": "chat"
|
| 2577 |
+
},
|
| 2578 |
+
{
|
| 2579 |
+
"llm_name": "BAAI/bge-large-en-v1.5",
|
| 2580 |
+
"tags": "TEXT EMBEDDING",
|
| 2581 |
+
"max_tokens": 512,
|
| 2582 |
+
"model_type": "embedding"
|
| 2583 |
+
},
|
| 2584 |
+
{
|
| 2585 |
+
"llm_name": "BAAI/bge-large-zh-v1.5",
|
| 2586 |
+
"tags": "TEXT EMBEDDING",
|
| 2587 |
+
"max_tokens": 1024,
|
| 2588 |
+
"model_type": "embedding"
|
| 2589 |
+
},
|
| 2590 |
+
{
|
| 2591 |
+
"llm_name": "BAAI/bge-m3",
|
| 2592 |
+
"tags": "TEXT EMBEDDING",
|
| 2593 |
+
"max_tokens": 8192,
|
| 2594 |
+
"model_type": "embedding"
|
| 2595 |
+
}
|
| 2596 |
+
]
|
| 2597 |
+
}
|
| 2598 |
]
|
| 2599 |
}
|
rag/llm/__init__.py
CHANGED
|
@@ -38,7 +38,8 @@ EmbeddingModel = {
|
|
| 38 |
"NVIDIA": NvidiaEmbed,
|
| 39 |
"LM-Studio": LmStudioEmbed,
|
| 40 |
"OpenAI-API-Compatible": OpenAI_APIEmbed,
|
| 41 |
-
"cohere": CoHereEmbed
|
|
|
|
| 42 |
}
|
| 43 |
|
| 44 |
|
|
@@ -84,7 +85,8 @@ ChatModel = {
|
|
| 84 |
"LM-Studio": LmStudioChat,
|
| 85 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
| 86 |
"cohere": CoHereChat,
|
| 87 |
-
"LeptonAI": LeptonAIChat
|
|
|
|
| 88 |
}
|
| 89 |
|
| 90 |
|
|
|
|
| 38 |
"NVIDIA": NvidiaEmbed,
|
| 39 |
"LM-Studio": LmStudioEmbed,
|
| 40 |
"OpenAI-API-Compatible": OpenAI_APIEmbed,
|
| 41 |
+
"cohere": CoHereEmbed,
|
| 42 |
+
"PerfXCloud": PerfXCloudEmbed,
|
| 43 |
}
|
| 44 |
|
| 45 |
|
|
|
|
| 85 |
"LM-Studio": LmStudioChat,
|
| 86 |
"OpenAI-API-Compatible": OpenAI_APIChat,
|
| 87 |
"cohere": CoHereChat,
|
| 88 |
+
"LeptonAI": LeptonAIChat,
|
| 89 |
+
"PerfXCloud": PerfXCloudChat
|
| 90 |
}
|
| 91 |
|
| 92 |
|
rag/llm/chat_model.py
CHANGED
|
@@ -987,4 +987,11 @@ class LeptonAIChat(Base):
|
|
| 987 |
def __init__(self, key, model_name, base_url=None):
|
| 988 |
if not base_url:
|
| 989 |
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
|
| 990 |
-
super().__init__(key, model_name, base_url)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 987 |
def __init__(self, key, model_name, base_url=None):
|
| 988 |
if not base_url:
|
| 989 |
base_url = os.path.join("https://"+model_name+".lepton.run","api","v1")
|
| 990 |
+
super().__init__(key, model_name, base_url)
|
| 991 |
+
|
| 992 |
+
|
| 993 |
+
class PerfXCloudChat(Base):
|
| 994 |
+
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
|
| 995 |
+
if not base_url:
|
| 996 |
+
base_url = "https://cloud.perfxlab.cn/v1"
|
| 997 |
+
super().__init__(key, model_name, base_url)
|
rag/llm/embedding_model.py
CHANGED
|
@@ -553,3 +553,10 @@ class CoHereEmbed(Base):
|
|
| 553 |
return np.array([d for d in res.embeddings.float]), int(
|
| 554 |
res.meta.billed_units.input_tokens
|
| 555 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 553 |
return np.array([d for d in res.embeddings.float]), int(
|
| 554 |
res.meta.billed_units.input_tokens
|
| 555 |
)
|
| 556 |
+
|
| 557 |
+
|
| 558 |
+
class PerfXCloudEmbed(OpenAIEmbed):
|
| 559 |
+
def __init__(self, key, model_name, base_url="https://cloud.perfxlab.cn/v1"):
|
| 560 |
+
if not base_url:
|
| 561 |
+
base_url = "https://cloud.perfxlab.cn/v1"
|
| 562 |
+
super().__init__(key, model_name, base_url)
|
web/src/assets/svg/llm/perfx-cloud.svg
ADDED
|
|
web/src/pages/user-setting/setting-model/constant.ts
CHANGED
|
@@ -25,6 +25,7 @@ export const IconMap = {
|
|
| 25 |
'OpenAI-API-Compatible': 'openai-api',
|
| 26 |
cohere: 'cohere',
|
| 27 |
Lepton: 'lepton',
|
|
|
|
| 28 |
};
|
| 29 |
|
| 30 |
export const BedrockRegionList = [
|
|
|
|
| 25 |
'OpenAI-API-Compatible': 'openai-api',
|
| 26 |
cohere: 'cohere',
|
| 27 |
Lepton: 'lepton',
|
| 28 |
+
PerfXCould: 'perfx-could'
|
| 29 |
};
|
| 30 |
|
| 31 |
export const BedrockRegionList = [
|