AI_Beta

Sleeping

App Files Files

JohnSmith9982 commited on Apr 14, 2023

Commit

c21f04b

1 Parent(s): f3b0f10

Upload models.py

Browse files

Files changed (1) hide show

modules/models.py +16 -17

modules/models.py CHANGED Viewed

@@ -84,9 +84,9 @@ class OpenAIClient(BaseLLMModel):
                 usage_data = self._get_billing_data(usage_url)
             except Exception as e:
                 logging.error(f"获取API使用情况失败:" + str(e))
-                return f"**获取API使用情况失败**"
             rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
-            return f"**本月使用金额** \u3000 ${rounded_usage}"
         except requests.exceptions.ConnectTimeout:
             status_text = (
                 STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
@@ -96,7 +96,7 @@ class OpenAIClient(BaseLLMModel):
             status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
             return status_text
         except Exception as e:
-            logging.error(f"获取API使用情况失败:" + str(e))
             return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
     def set_token_upper_limit(self, new_upper_limit):
@@ -105,7 +105,7 @@ class OpenAIClient(BaseLLMModel):
     def set_key(self, new_access_key):
         self.api_key = new_access_key.strip()
         self._refresh_header()
-        msg = f"API密钥更改为了{hide_middle_chars(self.api_key)}"
         logging.info(msg)
         return msg
@@ -197,7 +197,7 @@ class OpenAIClient(BaseLLMModel):
                 try:
                     chunk = json.loads(chunk[6:])
                 except json.JSONDecodeError:
-                    print(f"JSON解析错误,收到的内容: {chunk}")
                     error_msg += chunk
                     continue
                 if chunk_length > 6 and "delta" in chunk["choices"][0]:
@@ -235,25 +235,21 @@ class ChatGLM_Client(BaseLLMModel):
             quantified = False
             if "int4" in model_name:
                 quantified = True
-            if quantified:
-                model = AutoModel.from_pretrained(
                     model_source, trust_remote_code=True
-                ).half()
-            else:
-                model = AutoModel.from_pretrained(
-                    model_source, trust_remote_code=True
-                ).half()
             if torch.cuda.is_available():
                 # run on CUDA
                 logging.info("CUDA is available, using CUDA")
-                model = model.cuda()
             # mps加速还存在一些问题，暂时不使用
             elif system_name == "Darwin" and model_path is not None and not quantified:
                 logging.info("Running on macOS, using MPS")
                 # running on macOS and model already downloaded
-                model = model.to("mps")
             else:
                 logging.info("GPU is not available, using CPU")
             model = model.eval()
             CHATGLM_MODEL = model
@@ -483,8 +479,11 @@ class XMBot_Client(BaseLLMModel):
             "data": question
         }
         response = requests.post(self.url, json=data)
-        response = json.loads(response.text)
-        return response["data"], len(response["data"])
@@ -497,7 +496,7 @@ def get_model(
     top_p=None,
     system_prompt=None,
 ) -> BaseLLMModel:
-    msg = f"模型设置为了： {model_name}"
     model_type = ModelType.get_type(model_name)
     lora_selector_visibility = False
     lora_choices = []

                 usage_data = self._get_billing_data(usage_url)
             except Exception as e:
                 logging.error(f"获取API使用情况失败:" + str(e))
+                return i18n("**获取API使用情况失败**")
             rounded_usage = "{:.5f}".format(usage_data["total_usage"] / 100)
+            return i18n("**本月使用金额** ") + f"\u3000 ${rounded_usage}"
         except requests.exceptions.ConnectTimeout:
             status_text = (
                 STANDARD_ERROR_MSG + CONNECTION_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
             status_text = STANDARD_ERROR_MSG + READ_TIMEOUT_MSG + ERROR_RETRIEVE_MSG
             return status_text
         except Exception as e:
+            logging.error(i18n("获取API使用情况失败:") + str(e))
             return STANDARD_ERROR_MSG + ERROR_RETRIEVE_MSG
     def set_token_upper_limit(self, new_upper_limit):
     def set_key(self, new_access_key):
         self.api_key = new_access_key.strip()
         self._refresh_header()
+        msg = i18n("API密钥更改为了") + f"{hide_middle_chars(self.api_key)}"
         logging.info(msg)
         return msg
                 try:
                     chunk = json.loads(chunk[6:])
                 except json.JSONDecodeError:
+                    print(i18n("JSON解析错误,收到的内容: ") + f"{chunk}")
                     error_msg += chunk
                     continue
                 if chunk_length > 6 and "delta" in chunk["choices"][0]:
             quantified = False
             if "int4" in model_name:
                 quantified = True
+            model = AutoModel.from_pretrained(
                     model_source, trust_remote_code=True
+                )
             if torch.cuda.is_available():
                 # run on CUDA
                 logging.info("CUDA is available, using CUDA")
+                model = model.half().cuda()
             # mps加速还存在一些问题，暂时不使用
             elif system_name == "Darwin" and model_path is not None and not quantified:
                 logging.info("Running on macOS, using MPS")
                 # running on macOS and model already downloaded
+                model = model.half().to("mps")
             else:
                 logging.info("GPU is not available, using CPU")
+                model = model.float()
             model = model.eval()
             CHATGLM_MODEL = model
             "data": question
         }
         response = requests.post(self.url, json=data)
+        try:
+            response = json.loads(response.text)
+            return response["data"], len(response["data"])
+        except Exception as e:
+            return response.text, len(response.text)
     top_p=None,
     system_prompt=None,
 ) -> BaseLLMModel:
+    msg = i18n("模型设置为了：") + f" {model_name}"
     model_type = ModelType.get_type(model_name)
     lora_selector_visibility = False
     lora_choices = []