Dan commited on
Commit
fa4e6e4
·
1 Parent(s): 252757b

make pipeline work

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ .vscode
.python-version ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.11
pipeline/pipeline.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import Pipeline
2
+ from snac import SNAC
3
+ import torch
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+
6
+
7
+ class MyPipeline(Pipeline):
8
+ def __init__(self):
9
+ self.snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
10
+ self.snac_model = self.snac_model.to("cpu")
11
+ print(
12
+ "We have loaded the tokeniser/detokeniser model to the cpu, to use vram - use the gpu for faster inference"
13
+ )
14
+
15
+ tokeniser_name = "meta-llama/Llama-3.2-3B-Instruct"
16
+ model_name = "cubbk/orpheus-swedish"
17
+
18
+ self.model = AutoModelForCausalLM.from_pretrained(
19
+ model_name, torch_dtype=torch.bfloat16
20
+ )
21
+ self.model.cuda()
22
+ self.tokenizer = AutoTokenizer.from_pretrained(model_name)
23
+
24
+ def _sanitize_parameters(self, **kwargs):
25
+ return {}, {}, {}
26
+
27
+ def preprocess(self, inputs, args=2):
28
+ all_input_ids = []
29
+ for prompt in inputs:
30
+ input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
31
+ all_input_ids.append(input_ids)
32
+ start_token = torch.tensor([[128259]], dtype=torch.int64) # Start of human
33
+ end_tokens = torch.tensor(
34
+ [[128009, 128260]], dtype=torch.int64
35
+ ) # End of text, End of human
36
+
37
+ all_modified_input_ids = []
38
+ for input_ids in all_input_ids:
39
+ modified_input_ids = torch.cat(
40
+ [start_token, input_ids, end_tokens], dim=1
41
+ ) # SOH SOT Text EOT EOH
42
+ all_modified_input_ids.append(modified_input_ids)
43
+
44
+ all_padded_tensors = []
45
+ all_attention_masks = []
46
+ max_length = max(
47
+ [
48
+ modified_input_ids.shape[1]
49
+ for modified_input_ids in all_modified_input_ids
50
+ ]
51
+ )
52
+
53
+ for modified_input_ids in all_modified_input_ids:
54
+ padding = max_length - modified_input_ids.shape[1]
55
+ padded_tensor = torch.cat(
56
+ [
57
+ torch.full((1, padding), 128263, dtype=torch.int64),
58
+ modified_input_ids,
59
+ ],
60
+ dim=1,
61
+ )
62
+ attention_mask = torch.cat(
63
+ [
64
+ torch.zeros((1, padding), dtype=torch.int64),
65
+ torch.ones((1, modified_input_ids.shape[1]), dtype=torch.int64),
66
+ ],
67
+ dim=1,
68
+ )
69
+ all_padded_tensors.append(padded_tensor)
70
+ all_attention_masks.append(attention_mask)
71
+
72
+ all_padded_tensors = torch.cat(all_padded_tensors, dim=0)
73
+ all_attention_masks = torch.cat(all_attention_masks, dim=0)
74
+
75
+ input_ids = all_padded_tensors.to("cuda")
76
+ attention_mask = all_attention_masks.to("cuda")
77
+
78
+ return {"input_ids": input_ids, "attention_mask": attention_mask}
79
+
80
+ def _forward(self, model_inputs):
81
+ input_ids = model_inputs["input_ids"]
82
+ attention_mask = model_inputs["attention_mask"]
83
+
84
+ with torch.no_grad():
85
+ generated_ids = self.model.generate(
86
+ input_ids=input_ids,
87
+ attention_mask=attention_mask,
88
+ max_new_tokens=1200,
89
+ do_sample=True,
90
+ temperature=0.6,
91
+ top_p=0.95,
92
+ repetition_penalty=1.1,
93
+ num_return_sequences=1,
94
+ eos_token_id=128258,
95
+ )
96
+
97
+ return generated_ids
98
+
99
+ def postprocess(self, model_outputs):
100
+ return model_outputs
101
+
102
+
103
+ if __name__ == "__main__":
104
+ pipe = MyPipeline()
105
+ prompt = "Hej, hur mår du?"
106
+ outputs = pipe(prompt)
107
+ print(outputs)
pipeline/pipeline_test.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
pyproject.toml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "orpheus-swedish"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ readme = "README.md"
6
+ requires-python = ">=3.11"
7
+ dependencies = [
8
+ "numpy>=2.3.3",
9
+ "snac>=1.2.1",
10
+ "torch>=2.8.0",
11
+ "transformers>=4.56.1",
12
+ ]
uv.lock ADDED
The diff for this file is too large to render. See raw diff