Spaces:
Sleeping
Sleeping
Commit ·
c21f04b
1
Parent(s): f3b0f10
Upload models.py
Browse files- modules/models.py +16 -17
modules/models.py
CHANGED
|
@@ -84,9 +84,9 @@ class OpenAIClient(BaseLLMModel):
|
|
| 84 |
usage_data = self._get_billing_data(usage_url)
|
| 85 |
except Exception as e:
|
| 86 |
logging.error(f"获取API使用情况失败:" + str(e))
|
| 87 |
-
return
|
| 88 |
rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
|
| 89 |
-
return
|
| 90 |
except requests.exceptions.ConnectTimeout:
|
| 91 |
status_text = (
|
| 92 |
STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
|
@@ -96,7 +96,7 @@ class OpenAIClient(BaseLLMModel):
|
|
| 96 |
status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
| 97 |
return status_text
|
| 98 |
except Exception as e:
|
| 99 |
-
logging.error(
|
| 100 |
return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
|
| 101 |
|
| 102 |
def set_token_upper_limit(self, new_upper_limit):
|
|
@@ -105,7 +105,7 @@ class OpenAIClient(BaseLLMModel):
|
|
| 105 |
def set_key(self, new_access_key):
|
| 106 |
self.api_key = new_access_key.strip()
|
| 107 |
self._refresh_header()
|
| 108 |
-
msg =
|
| 109 |
logging.info(msg)
|
| 110 |
return msg
|
| 111 |
|
|
@@ -197,7 +197,7 @@ class OpenAIClient(BaseLLMModel):
|
|
| 197 |
try:
|
| 198 |
chunk = json.loads(chunk[6:])
|
| 199 |
except json.JSONDecodeError:
|
| 200 |
-
print(
|
| 201 |
error_msg += chunk
|
| 202 |
continue
|
| 203 |
if chunk_length > 6 and "delta" in chunk["choices"][0]:
|
|
@@ -235,25 +235,21 @@ class ChatGLM_Client(BaseLLMModel):
|
|
| 235 |
quantified = False
|
| 236 |
if "int4" in model_name:
|
| 237 |
quantified = True
|
| 238 |
-
|
| 239 |
-
model = AutoModel.from_pretrained(
|
| 240 |
model_source, trust_remote_code=True
|
| 241 |
-
)
|
| 242 |
-
else:
|
| 243 |
-
model = AutoModel.from_pretrained(
|
| 244 |
-
model_source, trust_remote_code=True
|
| 245 |
-
).half()
|
| 246 |
if torch.cuda.is_available():
|
| 247 |
# run on CUDA
|
| 248 |
logging.info("CUDA is available, using CUDA")
|
| 249 |
-
model = model.cuda()
|
| 250 |
# mps加速还存在一些问题,暂时不使用
|
| 251 |
elif system_name == "Darwin" and model_path is not None and not quantified:
|
| 252 |
logging.info("Running on macOS, using MPS")
|
| 253 |
# running on macOS and model already downloaded
|
| 254 |
-
model = model.to("mps")
|
| 255 |
else:
|
| 256 |
logging.info("GPU is not available, using CPU")
|
|
|
|
| 257 |
model = model.eval()
|
| 258 |
CHATGLM_MODEL = model
|
| 259 |
|
|
@@ -483,8 +479,11 @@ class XMBot_Client(BaseLLMModel):
|
|
| 483 |
"data": question
|
| 484 |
}
|
| 485 |
response = requests.post(self.url, json=data)
|
| 486 |
-
|
| 487 |
-
|
|
|
|
|
|
|
|
|
|
| 488 |
|
| 489 |
|
| 490 |
|
|
@@ -497,7 +496,7 @@ def get_model(
|
|
| 497 |
top_p=None,
|
| 498 |
system_prompt=None,
|
| 499 |
) -> BaseLLMModel:
|
| 500 |
-
msg =
|
| 501 |
model_type = ModelType.get_type(model_name)
|
| 502 |
lora_selector_visibility = False
|
| 503 |
lora_choices = []
|
|
|
|
| 84 |
usage_data = self._get_billing_data(usage_url)
|
| 85 |
except Exception as e:
|
| 86 |
logging.error(f"获取API使用情况失败:" + str(e))
|
| 87 |
+
return i18n("**获取API使用情况失败**")
|
| 88 |
rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
|
| 89 |
+
return i18n("**本月使用金额** ") + f"\u3000 ${rounded_usage}"
|
| 90 |
except requests.exceptions.ConnectTimeout:
|
| 91 |
status_text = (
|
| 92 |
STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
|
|
|
| 96 |
status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
|
| 97 |
return status_text
|
| 98 |
except Exception as e:
|
| 99 |
+
logging.error(i18n("获取API使用情况失败:") + str(e))
|
| 100 |
return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
|
| 101 |
|
| 102 |
def set_token_upper_limit(self, new_upper_limit):
|
|
|
|
| 105 |
def set_key(self, new_access_key):
|
| 106 |
self.api_key = new_access_key.strip()
|
| 107 |
self._refresh_header()
|
| 108 |
+
msg = i18n("API密钥更改为了") + f"{hide_middle_chars(self.api_key)}"
|
| 109 |
logging.info(msg)
|
| 110 |
return msg
|
| 111 |
|
|
|
|
| 197 |
try:
|
| 198 |
chunk = json.loads(chunk[6:])
|
| 199 |
except json.JSONDecodeError:
|
| 200 |
+
print(i18n("JSON解析错误,收到的内容: ") + f"{chunk}")
|
| 201 |
error_msg += chunk
|
| 202 |
continue
|
| 203 |
if chunk_length > 6 and "delta" in chunk["choices"][0]:
|
|
|
|
| 235 |
quantified = False
|
| 236 |
if "int4" in model_name:
|
| 237 |
quantified = True
|
| 238 |
+
model = AutoModel.from_pretrained(
|
|
|
|
| 239 |
model_source, trust_remote_code=True
|
| 240 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 241 |
if torch.cuda.is_available():
|
| 242 |
# run on CUDA
|
| 243 |
logging.info("CUDA is available, using CUDA")
|
| 244 |
+
model = model.half().cuda()
|
| 245 |
# mps加速还存在一些问题,暂时不使用
|
| 246 |
elif system_name == "Darwin" and model_path is not None and not quantified:
|
| 247 |
logging.info("Running on macOS, using MPS")
|
| 248 |
# running on macOS and model already downloaded
|
| 249 |
+
model = model.half().to("mps")
|
| 250 |
else:
|
| 251 |
logging.info("GPU is not available, using CPU")
|
| 252 |
+
model = model.float()
|
| 253 |
model = model.eval()
|
| 254 |
CHATGLM_MODEL = model
|
| 255 |
|
|
|
|
| 479 |
"data": question
|
| 480 |
}
|
| 481 |
response = requests.post(self.url, json=data)
|
| 482 |
+
try:
|
| 483 |
+
response = json.loads(response.text)
|
| 484 |
+
return response["data"], len(response["data"])
|
| 485 |
+
except Exception as e:
|
| 486 |
+
return response.text, len(response.text)
|
| 487 |
|
| 488 |
|
| 489 |
|
|
|
|
| 496 |
top_p=None,
|
| 497 |
system_prompt=None,
|
| 498 |
) -> BaseLLMModel:
|
| 499 |
+
msg = i18n("模型设置为了:") + f" {model_name}"
|
| 500 |
model_type = ModelType.get_type(model_name)
|
| 501 |
lora_selector_visibility = False
|
| 502 |
lora_choices = []
|