rui3000 commited on
Commit
c85f9bb
Β·
verified Β·
1 Parent(s): 94dec36

Create service.py

Browse files
Files changed (1) hide show
  1. service.py +73 -0
service.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
+ import torch
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+
5
+ # Global variables
6
+ _model = None
7
+ _tokenizer = None
8
+ _model_name = "microsoft/DialoGPT-small" # Using a smaller, faster model for testing
9
+
10
+ def initialize_tokenizer():
11
+ """Initialize tokenizer"""
12
+ global _tokenizer
13
+ if _tokenizer is None:
14
+ print("[MinimalService] Loading tokenizer...")
15
+ _tokenizer = AutoTokenizer.from_pretrained(_model_name)
16
+ if _tokenizer.pad_token is None:
17
+ _tokenizer.pad_token = _tokenizer.eos_token
18
+ print("[MinimalService] Tokenizer loaded successfully.")
19
+ return _tokenizer
20
+
21
+ @spaces.GPU
22
+ def generate_text_gpu(prompt: str, max_tokens: int = 50):
23
+ """GPU function for text generation"""
24
+ global _model, _tokenizer
25
+
26
+ print("[MinimalService] GPU function called")
27
+
28
+ # Initialize tokenizer
29
+ if _tokenizer is None:
30
+ initialize_tokenizer()
31
+
32
+ # Load model in GPU context
33
+ if _model is None:
34
+ print("[MinimalService] Loading model...")
35
+ _model = AutoModelForCausalLM.from_pretrained(
36
+ _model_name,
37
+ torch_dtype=torch.float16,
38
+ device_map="auto"
39
+ )
40
+ print("[MinimalService] Model loaded.")
41
+
42
+ # Simple generation
43
+ inputs = _tokenizer.encode(prompt, return_tensors="pt")
44
+ device = next(_model.parameters()).device
45
+ inputs = inputs.to(device)
46
+
47
+ with torch.no_grad():
48
+ outputs = _model.generate(
49
+ inputs,
50
+ max_new_tokens=max_tokens,
51
+ temperature=0.7,
52
+ do_sample=True,
53
+ pad_token_id=_tokenizer.eos_token_id
54
+ )
55
+
56
+ response = _tokenizer.decode(outputs[0], skip_special_tokens=True)
57
+ return response
58
+
59
+ class MinimalService:
60
+ def __init__(self):
61
+ print("[MinimalService] Service initialized")
62
+ # Initialize tokenizer immediately
63
+ initialize_tokenizer()
64
+
65
+ def generate(self, prompt: str):
66
+ """Public method to generate text"""
67
+ return generate_text_gpu(prompt)
68
+
69
+ # Create instance
70
+ service = MinimalService()
71
+
72
+ # Print confirmation that GPU function is registered
73
+ print(f"[MinimalService] GPU function available: {generate_text_gpu.__name__}")