WildnerveAI commited on
Commit
2e70e8c
·
verified ·
1 Parent(s): 79fb938

Upload handler.py

Browse files
Files changed (1) hide show
  1. handler.py +64 -59
handler.py CHANGED
@@ -2,65 +2,70 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import torch
3
  import os
4
 
5
- # Global model and tokenizer instances
6
- model = None
7
- tokenizer = None
 
 
 
 
 
8
 
9
- def load_model():
10
- """Initialize the model and tokenizer once"""
11
- global model, tokenizer
12
 
13
- # Model repository ID
14
- model_id = "EvolphTech/Wildnerve-tlm01_Hybrid_Model"
15
-
16
- # Get token from environment (if needed for private models)
17
- hf_token = os.environ.get("HF_TOKEN")
18
-
19
- # Load model and tokenizer
20
- model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
21
- tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
22
-
23
- # Move to GPU if available
24
- if torch.cuda.is_available():
25
- model = model.to("cuda")
26
-
27
- print("Model and tokenizer loaded successfully!")
 
 
28
 
29
- def predict(inputs, parameters=None):
30
- """Primary inference function for the model"""
31
- global model, tokenizer
32
-
33
- # Load model if not already loaded
34
- if model is None or tokenizer is None:
35
- load_model()
36
-
37
- # Default parameters
38
- max_length = parameters.get("max_length", 100) if parameters else 100
39
- temperature = parameters.get("temperature", 0.7) if parameters else 0.7
40
-
41
- # Process the input text
42
- if isinstance(inputs, str):
43
- text_input = inputs
44
- elif isinstance(inputs, dict) and "inputs" in inputs:
45
- text_input = inputs["inputs"]
46
- else:
47
- text_input = str(inputs)
48
-
49
- # Tokenize inputs
50
- inputs = tokenizer(text_input, return_tensors="pt")
51
- input_ids = inputs["input_ids"].to(model.device)
52
-
53
- # Generate text
54
- with torch.no_grad():
55
- outputs = model.generate(
56
- input_ids,
57
- max_length=max_length,
58
- temperature=temperature,
59
- do_sample=temperature > 0,
60
- pad_token_id=tokenizer.eos_token_id
61
- )
62
-
63
- # Decode and return the generated text
64
- generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
65
-
66
- return {"generated_text": generated_text}
 
2
  import torch
3
  import os
4
 
5
+ # Create the required EndpointHandler class for Hugging Face Inference Endpoints
6
+ class EndpointHandler:
7
+ def __init__(self, path=""):
8
+ # Initialize model and tokenizer as None - will be loaded on first request
9
+ self.model = None
10
+ self.tokenizer = None
11
+ self.path = path
12
+ print(f"Initializing EndpointHandler with path: {path}")
13
 
14
+ def __call__(self, data, parameters=None):
15
+ # This will be called when the endpoint receives a request
16
+ return self.predict(data, parameters)
17
 
18
+ def load_model(self):
19
+ """Initialize the model and tokenizer once"""
20
+ # Model repository ID
21
+ model_id = "EvolphTech/Wildnerve-tlm01_Hybrid_Model"
22
+
23
+ # Get token from environment (if needed for private models)
24
+ hf_token = os.environ.get("HF_TOKEN")
25
+
26
+ # Load model and tokenizer
27
+ self.model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token)
28
+ self.tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token)
29
+
30
+ # Move to GPU if available
31
+ if torch.cuda.is_available():
32
+ self.model = self.model.to("cuda")
33
+
34
+ print("Model and tokenizer loaded successfully!")
35
 
36
+ def predict(self, inputs, parameters=None):
37
+ """Primary inference function for the model"""
38
+ # Load model if not already loaded
39
+ if self.model is None or self.tokenizer is None:
40
+ self.load_model()
41
+
42
+ # Default parameters
43
+ max_length = parameters.get("max_length", 100) if parameters else 100
44
+ temperature = parameters.get("temperature", 0.7) if parameters else 0.7
45
+
46
+ # Process the input text
47
+ if isinstance(inputs, str):
48
+ text_input = inputs
49
+ elif isinstance(inputs, dict) and "inputs" in inputs:
50
+ text_input = inputs["inputs"]
51
+ else:
52
+ text_input = str(inputs)
53
+
54
+ # Tokenize inputs
55
+ inputs = self.tokenizer(text_input, return_tensors="pt")
56
+ input_ids = inputs["input_ids"].to(self.model.device)
57
+
58
+ # Generate text
59
+ with torch.no_grad():
60
+ outputs = self.model.generate(
61
+ input_ids,
62
+ max_length=max_length,
63
+ temperature=temperature,
64
+ do_sample=temperature > 0,
65
+ pad_token_id=self.tokenizer.eos_token_id
66
+ )
67
+
68
+ # Decode and return the generated text
69
+ generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
70
+
71
+ return {"generated_text": generated_text}