handraise-dev
/

gguf-inference

Text Generation

Model card Files Files and versions

syberWolf commited on Jul 4, 2024

Commit

b47e2d8

·

1 Parent(s): 5d540d6

test qwen

Files changed (2) hide show

handler.py +5 -7
requirements.txt +0 -4

handler.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import torch
 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
@@ -6,13 +5,12 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
-        tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct")
         model = AutoModelForCausalLM.from_pretrained(
-            "microsoft/Phi-3-mini-128k-instruct",
-            device_map="cuda",
             torch_dtype="auto",
-            trust_remote_code=True,
-        )
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
@@ -26,4 +24,4 @@ class EndpointHandler:
         else:
             prediction = self.pipeline(inputs)
         # postprocess the prediction
-        return prediction

 from typing import Dict, List, Any
 from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
 class EndpointHandler:
     def __init__(self, path=""):
         # load the model
+        tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct")
         model = AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen2-1.5B-Instruct",
             torch_dtype="auto",
+            device_map="auto"
+        )
         # create inference pipeline
         self.pipeline = pipeline("text-generation", model=model, tokenizer=tokenizer)
         else:
             prediction = self.pipeline(inputs)
         # postprocess the prediction
+        return prediction

requirements.txt DELETED Viewed

@@ -1,4 +0,0 @@
-flash_attn==2.5.8
-torch==2.3.1
-accelerate==0.31.0
-transformers==4.41.2