ubden
/

aimedlab-pulse-hf

@@ -1,128 +1,83 @@
 import torch
 from typing import Dict, List, Any
-import json
-import os
 class EndpointHandler:
     def __init__(self, path=""):
         """
         Initialize the handler for PULSE-7B model.
         Args:
-            path: Path to the model directory
         """
-        print(f"Initializing handler with path: {path}")
         # Device ayarla
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
-        # Config dosyasını manuel olarak yükle ve düzenle
-        config_path = os.path.join(path, "config.json")
-        if os.path.exists(config_path):
-            with open(config_path, 'r') as f:
-                config_data = json.load(f)
-            # Model tipini geçici olarak değiştir
-            original_model_type = config_data.get("model_type", "")
-            print(f"Original model type: {original_model_type}")
-            if original_model_type == "llava_llama":
-                # Geçici config dosyası oluştur
-                config_data["model_type"] = "llama"
-                config_data["architectures"] = ["LlamaForCausalLM"]
-                temp_config_path = os.path.join(path, "temp_config.json")
-                with open(temp_config_path, 'w') as f:
-                    json.dump(config_data, f)
-                # Llama model olarak yükle
-                from transformers import LlamaForCausalLM, LlamaTokenizer, AutoTokenizer
-                try:
-                    # Tokenizer'ı yükle
-                    print("Loading tokenizer...")
-                    self.tokenizer = AutoTokenizer.from_pretrained(
-                        path,
-                        use_fast=False,
-                        trust_remote_code=True
-                    )
-                    # Model'i Llama olarak yükle
-                    print("Loading model as Llama...")
-                    self.model = LlamaForCausalLM.from_pretrained(
-                        path,
-                        config=temp_config_path,
-                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                        device_map="auto",
-                        low_cpu_mem_usage=True,
-                        ignore_mismatched_sizes=True
-                    )
-                    # Temp config'i sil
-                    if os.path.exists(temp_config_path):
-                        os.remove(temp_config_path)
-                except Exception as e:
-                    print(f"Llama loading failed: {e}")
-                    # En basit yöntem: AutoModel kullan
-                    from transformers import AutoModel, AutoTokenizer
-                    self.tokenizer = AutoTokenizer.from_pretrained(
-                        path,
-                        trust_remote_code=True
-                    )
-                    self.model = AutoModel.from_pretrained(
-                        path,
-                        torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                        device_map="auto",
-                        trust_remote_code=True,
-                        ignore_mismatched_sizes=True
-                    )
-            else:
-                # Standart yükleme
-                from transformers import AutoModelForCausalLM, AutoTokenizer
                 self.tokenizer = AutoTokenizer.from_pretrained(
-                    path,
                     trust_remote_code=True
                 )
-                self.model = AutoModelForCausalLM.from_pretrained(
-                    path,
                     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                     device_map="auto",
-                    trust_remote_code=True,
-                    low_cpu_mem_usage=True
                 )
         else:
-            # Config bulunamadı, direkt yüklemeyi dene
-            print("Config not found, trying direct loading...")
-            from transformers import AutoModelForCausalLM, AutoTokenizer
-            self.tokenizer = AutoTokenizer.from_pretrained(
-                "PULSE-ECG/PULSE-7B",
-                trust_remote_code=True
-            )
-            self.model = AutoModelForCausalLM.from_pretrained(
-                "PULSE-ECG/PULSE-7B",
-                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-                device_map="auto",
-                trust_remote_code=True,
-                ignore_mismatched_sizes=True
-            )
-        # Padding token ayarla
-        if not hasattr(self.tokenizer, 'pad_token') or self.tokenizer.pad_token is None:
-            self.tokenizer.pad_token = self.tokenizer.eos_token
-            self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
-        # Model'i eval moduna al
-        self.model.eval()
-        print("Handler initialization complete!")
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
@@ -134,6 +89,13 @@ class EndpointHandler:
         Returns:
             List containing the generated response
         """
         try:
             # Input'ları al
             inputs = data.get("inputs", "")
@@ -145,46 +107,75 @@ class EndpointHandler:
             if not text:
                 return [{"generated_text": "Please provide an input text."}]
-            # Parametreleri al (basit tut)
             parameters = data.get("parameters", {})
-            max_new_tokens = min(parameters.get("max_new_tokens", 128), 512)
             temperature = parameters.get("temperature", 0.7)
             do_sample = parameters.get("do_sample", True)
-            # Tokenize
-            encoded = self.tokenizer(
-                text,
-                return_tensors="pt",
-                truncation=True,
-                max_length=1024
-            )
-            input_ids = encoded["input_ids"].to(self.device)
-            # Generate
-            with torch.no_grad():
-                outputs = self.model.generate(
-                    input_ids,
                     max_new_tokens=max_new_tokens,
-                    temperature=temperature if do_sample else 1.0,
                     do_sample=do_sample,
-                    pad_token_id=self.tokenizer.pad_token_id,
-                    eos_token_id=self.tokenizer.eos_token_id
                 )
-            # Decode
-            generated_text = self.tokenizer.decode(
-                outputs[0],
-                skip_special_tokens=True
-            )
-            # Remove input from output
-            if generated_text.startswith(text):
-                generated_text = generated_text[len(text):].strip()
-            return [{"generated_text": generated_text}]
         except Exception as e:
             error_msg = f"Error during generation: {str(e)}"
             print(error_msg)
-            return [{"generated_text": "", "error": error_msg}]

 import torch
 from typing import Dict, List, Any
 class EndpointHandler:
     def __init__(self, path=""):
         """
         Initialize the handler for PULSE-7B model.
+        Direct reference to the original model.
         Args:
+            path: Path to the model directory (not used, we load from HF hub)
         """
+        print("Initializing PULSE-7B handler...")
         # Device ayarla
         self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"Using device: {self.device}")
+        try:
+            # Pipeline kullan - en basit ve güvenilir yöntem
+            from transformers import pipeline
+            print("Loading model from HuggingFace Hub...")
+            self.pipe = pipeline(
+                "text-generation",
+                model="PULSE-ECG/PULSE-7B",
+                torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
+                device=0 if torch.cuda.is_available() else -1,
+                trust_remote_code=True,
+                model_kwargs={
+                    "low_cpu_mem_usage": True,
+                    "use_safetensors": True
+                }
+            )
+            print("Model loaded successfully via pipeline!")
+        except Exception as e:
+            print(f"Pipeline loading failed: {e}")
+            print("Trying alternative loading method...")
+            try:
+                # Alternatif: Model ve tokenizer'ı ayrı yükle
+                from transformers import AutoTokenizer, LlamaForCausalLM
+                # Tokenizer'ı yükle
+                print("Loading tokenizer...")
                 self.tokenizer = AutoTokenizer.from_pretrained(
+                    "PULSE-ECG/PULSE-7B",
                     trust_remote_code=True
                 )
+                # Model'i Llama olarak yükle
+                print("Loading model as Llama...")
+                self.model = LlamaForCausalLM.from_pretrained(
+                    "PULSE-ECG/PULSE-7B",
                     torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
                     device_map="auto",
+                    low_cpu_mem_usage=True,
+                    trust_remote_code=True
                 )
+                # Padding token ayarla
+                if self.tokenizer.pad_token is None:
+                    self.tokenizer.pad_token = self.tokenizer.eos_token
+                    self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
+                self.model.eval()
+                self.use_pipeline = False
+                print("Model loaded successfully via direct loading!")
+            except Exception as e2:
+                print(f"Alternative loading also failed: {e2}")
+                # En son çare: Basit bir fallback mesajı
+                self.pipe = None
+                self.model = None
+                self.tokenizer = None
+                self.use_pipeline = None
         else:
+            self.use_pipeline = True
     def __call__(self, data: Dict[str, Any]) -> List[Dict[str, Any]]:
         """
         Returns:
             List containing the generated response
         """
+        # Model yüklenemediyse hata döndür
+        if self.use_pipeline is None:
+            return [{
+                "generated_text": "Model could not be loaded. Please check the deployment configuration.",
+                "error": "Model initialization failed"
+            }]
         try:
             # Input'ları al
             inputs = data.get("inputs", "")
             if not text:
                 return [{"generated_text": "Please provide an input text."}]
+            # Parametreleri al
             parameters = data.get("parameters", {})
+            max_new_tokens = min(parameters.get("max_new_tokens", 256), 1024)
             temperature = parameters.get("temperature", 0.7)
+            top_p = parameters.get("top_p", 0.95)
             do_sample = parameters.get("do_sample", True)
+            repetition_penalty = parameters.get("repetition_penalty", 1.0)
+            # Pipeline kullanıyorsak
+            if self.use_pipeline:
+                result = self.pipe(
+                    text,
                     max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=top_p,
                     do_sample=do_sample,
+                    repetition_penalty=repetition_penalty,
+                    return_full_text=False  # Sadece yeni üretilen metni döndür
                 )
+                # Pipeline list döndürür
+                if isinstance(result, list) and len(result) > 0:
+                    return [{"generated_text": result[0].get("generated_text", "")}]
+                else:
+                    return [{"generated_text": str(result)}]
+            # Manuel generation kullanıyorsak
+            else:
+                # Tokenize
+                encoded = self.tokenizer(
+                    text,
+                    return_tensors="pt",
+                    truncation=True,
+                    max_length=2048
+                )
+                input_ids = encoded["input_ids"].to(self.device)
+                attention_mask = encoded.get("attention_mask")
+                if attention_mask is not None:
+                    attention_mask = attention_mask.to(self.device)
+                # Generate
+                with torch.no_grad():
+                    outputs = self.model.generate(
+                        input_ids,
+                        attention_mask=attention_mask,
+                        max_new_tokens=max_new_tokens,
+                        temperature=temperature,
+                        top_p=top_p,
+                        do_sample=do_sample,
+                        repetition_penalty=repetition_penalty,
+                        pad_token_id=self.tokenizer.pad_token_id,
+                        eos_token_id=self.tokenizer.eos_token_id
+                    )
+                # Decode - sadece yeni tokenleri al
+                generated_ids = outputs[0][input_ids.shape[-1]:]
+                generated_text = self.tokenizer.decode(
+                    generated_ids,
+                    skip_special_tokens=True,
+                    clean_up_tokenization_spaces=True
+                )
+                return [{"generated_text": generated_text}]
         except Exception as e:
             error_msg = f"Error during generation: {str(e)}"
             print(error_msg)
+            return [{
+                "generated_text": "",
+                "error": error_msg
+            }]