homer7676
/

FrierenChatbotV1

@@ -1,63 +1,62 @@
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from typing import Dict, Any
-import re
 class EndpointHandler:
-    def __init__(self, model_dir: str = None):
-        self.model_dir = model_dir
-        self.device = "cuda" if torch.cuda.is_available() else "cpu"
-        self.model = None
         self.tokenizer = None
-    def initialize(self, context: Dict[str, Any] = None):
-        """Initialize the model and tokenizer."""
-        model_id = "homer7676/FrierenChatbotV1"
-        # Initialize tokenizer
         self.tokenizer = AutoTokenizer.from_pretrained(
-            model_id,
-            trust_remote_code=True,
-            padding_side="left"
         )
-        # Ensure pad token exists
-        if self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-        # Initialize model
         self.model = AutoModelForCausalLM.from_pretrained(
-            model_id,
             trust_remote_code=True,
-            torch_dtype="auto",
-            low_cpu_mem_usage=True
         ).to(self.device)
         self.model.eval()
-        return self
-    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        """Main prediction pipeline."""
-        inputs = self.preprocess(data)
-        outputs = self.inference(inputs)
-        return self.postprocess(outputs)
     def preprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        """Preprocess the input data."""
-        if isinstance(data, str):
-            return {"message": data}
         inputs = data.pop("inputs", data)
-        return inputs if isinstance(inputs, dict) else {"message": inputs}
     def inference(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
-        """Run the inference."""
         try:
-            # 準備輸入
             message = inputs.get("message", "")
             context = inputs.get("context", "")
-            prompt = self._build_prompt(context, message)
-            # Tokenize
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
@@ -66,72 +65,28 @@ class EndpointHandler:
                 max_length=2048
             ).to(self.device)
-            # Generate
             with torch.no_grad():
-                generation_output = self.model.generate(
-                    input_ids=inputs["input_ids"],
-                    attention_mask=inputs["attention_mask"],
                     max_new_tokens=256,
                     temperature=0.7,
                     top_p=0.9,
                     top_k=50,
                     do_sample=True,
                     pad_token_id=self.tokenizer.pad_token_id,
-                    eos_token_id=self.tokenizer.eos_token_id,
-                    repetition_penalty=1.2
                 )
-            response = self.tokenizer.decode(
-                generation_output[0],
-                skip_special_tokens=True
-            )
-            # 處理回應
             response = response.split("芙莉蓮：")[-1].strip()
-            response = self._process_response(response)
-            return {"response": response}
-        except Exception as e:
-            return {"error": f"Inference error: {str(e)}"}
-    def _build_prompt(self, context: str, query: str) -> str:
-        """Build the prompt for the model."""
-        return f"""你是芙莉蓮，需要遵守以下規則回答：
-1. 身份設定：
- - 千年精靈魔法師
- - 態度溫柔但帶著些許嘲諷
- - 說話優雅且有距離感
-2. 重要關係：
- - 弗蘭梅是我的師傅
- - 費倫是我的學生
- - 欣梅爾是我的摯友
- - 海塔是我的故友
-3. 回答規則：
- - 使用繁體中文
- - 必須提供具體詳細的內容
- - 保持回答的連貫性和完整性
-相關資訊：{context}
-用戶：{query}
-芙莉蓮："""
-    def _process_response(self, response: str) -> str:
-        """Process the model's response."""
-        if not response or not response.strip():
-            return "抱歉，我現在有點恍神，請你再問一次好嗎？"
-        # Convert to traditional Chinese
-        for simplified, traditional in SIMPLIFIED_TO_TRADITIONAL.items():
-            response = response.replace(simplified, traditional)
-        # Clean up whitespace
-        response = re.sub(r'\s+', '', response)
-        # Add ending punctuation if needed
-        if not response.endswith(('。', '！', '？', '~', '呢', '啊', '吶')):
-            response += '呢。'
-        return response
     def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
-        """Postprocess the output data."""
         return data

 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 from typing import Dict, Any
 class EndpointHandler:
+    def __init__(self):
         self.tokenizer = None
+        self.model = None
+        self.device = "cuda" if torch.cuda.is_available() else "cpu"
+    def __call__(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """使 handler 可調用"""
+        inputs = self.preprocess(data)
+        outputs = self.inference(inputs)
+        return self.postprocess(outputs)
+    def initialize(self, context):
+        """初始化模型和 tokenizer"""
         self.tokenizer = AutoTokenizer.from_pretrained(
+            "homer7676/FrierenChatbotV1",
+            trust_remote_code=True
         )
         self.model = AutoModelForCausalLM.from_pretrained(
+            "homer7676/FrierenChatbotV1",
             trust_remote_code=True,
+            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
         ).to(self.device)
         self.model.eval()
     def preprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """預處理輸入數據"""
         inputs = data.pop("inputs", data)
+        if not isinstance(inputs, dict):
+            inputs = {"message": inputs}
+        return inputs
     def inference(self, inputs: Dict[str, Any]) -> Dict[str, Any]:
+        """執行推理"""
         try:
             message = inputs.get("message", "")
             context = inputs.get("context", "")
+            prompt = f"""你是芙莉蓮，需要遵守以下規則回答：
+1. 身份設定：
+ - 千年精靈魔法師
+ - 態度溫柔但帶著些許嘲諷
+ - 說話優雅且有距離感
+2. 重要關係：
+ - 弗蘭梅是我的師傅
+ - 費倫是我的學生
+ - 欣梅爾是我的摯友
+ - 海塔是我的故友
+3. 回答規則：
+ - 使用繁體中文
+ - 必須提供具體詳細的內容
+ - 保持回答的連貫性和完整性
+相關資訊：{context}
+用戶：{message}
+芙莉蓮："""
             inputs = self.tokenizer(
                 prompt,
                 return_tensors="pt",
                 max_length=2048
             ).to(self.device)
             with torch.no_grad():
+                outputs = self.model.generate(
+                    **inputs,
                     max_new_tokens=256,
                     temperature=0.7,
                     top_p=0.9,
                     top_k=50,
                     do_sample=True,
+                    repetition_penalty=1.2,
                     pad_token_id=self.tokenizer.pad_token_id,
+                    eos_token_id=self.tokenizer.eos_token_id
                 )
+            response = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
             response = response.split("芙莉蓮：")[-1].strip()
+            return {"generated_text": response}
+        except Exception as e:
+            print(f"推理過程錯誤: {str(e)}")
+            return {"error": str(e)}
     def postprocess(self, data: Dict[str, Any]) -> Dict[str, Any]:
+        """後處理輸出數據"""
         return data