Elleres commited on
Commit
704ce4b
·
verified ·
1 Parent(s): 9965438

Create mixtral_server.py

Browse files
Files changed (1) hide show
  1. mixtral_server.py +14 -0
mixtral_server.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ import torch
3
+
4
+ model_name = "mistralai/Mixtral-8x7B-Instruct"
5
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
6
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, device_map="auto")
7
+
8
+ def generate_response(prompt):
9
+ inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
10
+ outputs = model.generate(**inputs, max_length=500)
11
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
12
+
13
+ # Testando a IA
14
+ print(generate_response("Olá, como posso te ajudar?"))