make pipeline work

Files changed (6) hide show

.gitignore +1 -0
.python-version +1 -0
pipeline/pipeline.py +107 -0
pipeline/pipeline_test.ipynb +0 -0
pyproject.toml +12 -0
uv.lock +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1 @@


1	+ .vscode

.python-version ADDED Viewed

	@@ -0,0 +1 @@


1	+ 3.11

pipeline/pipeline.py ADDED Viewed

	@@ -0,0 +1,107 @@

+from transformers import Pipeline
+from snac import SNAC
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer
+class MyPipeline(Pipeline):
+    def __init__(self):
+        self.snac_model = SNAC.from_pretrained("hubertsiuzdak/snac_24khz")
+        self.snac_model = self.snac_model.to("cpu")
+        print(
+            "We have loaded the tokeniser/detokeniser model to the cpu, to use vram - use the gpu for faster inference"
+        )
+        tokeniser_name = "meta-llama/Llama-3.2-3B-Instruct"
+        model_name = "cubbk/orpheus-swedish"
+        self.model = AutoModelForCausalLM.from_pretrained(
+            model_name, torch_dtype=torch.bfloat16
+        )
+        self.model.cuda()
+        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+    def _sanitize_parameters(self, **kwargs):
+        return {}, {}, {}
+    def preprocess(self, inputs, args=2):
+        all_input_ids = []
+        for prompt in inputs:
+            input_ids = self.tokenizer(prompt, return_tensors="pt").input_ids
+            all_input_ids.append(input_ids)
+        start_token = torch.tensor([[128259]], dtype=torch.int64)  # Start of human
+        end_tokens = torch.tensor(
+            [[128009, 128260]], dtype=torch.int64
+        )  # End of text, End of human
+        all_modified_input_ids = []
+        for input_ids in all_input_ids:
+            modified_input_ids = torch.cat(
+                [start_token, input_ids, end_tokens], dim=1
+            )  # SOH SOT Text EOT EOH
+            all_modified_input_ids.append(modified_input_ids)
+        all_padded_tensors = []
+        all_attention_masks = []
+        max_length = max(
+            [
+                modified_input_ids.shape[1]
+                for modified_input_ids in all_modified_input_ids
+            ]
+        )
+        for modified_input_ids in all_modified_input_ids:
+            padding = max_length - modified_input_ids.shape[1]
+            padded_tensor = torch.cat(
+                [
+                    torch.full((1, padding), 128263, dtype=torch.int64),
+                    modified_input_ids,
+                ],
+                dim=1,
+            )
+            attention_mask = torch.cat(
+                [
+                    torch.zeros((1, padding), dtype=torch.int64),
+                    torch.ones((1, modified_input_ids.shape[1]), dtype=torch.int64),
+                ],
+                dim=1,
+            )
+            all_padded_tensors.append(padded_tensor)
+            all_attention_masks.append(attention_mask)
+        all_padded_tensors = torch.cat(all_padded_tensors, dim=0)
+        all_attention_masks = torch.cat(all_attention_masks, dim=0)
+        input_ids = all_padded_tensors.to("cuda")
+        attention_mask = all_attention_masks.to("cuda")
+        return {"input_ids": input_ids, "attention_mask": attention_mask}
+    def _forward(self, model_inputs):
+        input_ids = model_inputs["input_ids"]
+        attention_mask = model_inputs["attention_mask"]
+        with torch.no_grad():
+            generated_ids = self.model.generate(
+                input_ids=input_ids,
+                attention_mask=attention_mask,
+                max_new_tokens=1200,
+                do_sample=True,
+                temperature=0.6,
+                top_p=0.95,
+                repetition_penalty=1.1,
+                num_return_sequences=1,
+                eos_token_id=128258,
+            )
+        return generated_ids
+    def postprocess(self, model_outputs):
+        return model_outputs
+if __name__ == "__main__":
+    pipe = MyPipeline()
+    prompt = "Hej, hur mår du?"
+    outputs = pipe(prompt)
+    print(outputs)

pipeline/pipeline_test.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,12 @@

+[project]
+name = "orpheus-swedish"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "numpy>=2.3.3",
+    "snac>=1.2.1",
+    "torch>=2.8.0",
+    "transformers>=4.56.1",
+]

uv.lock ADDED Viewed

The diff for this file is too large to render. See raw diff