AkramOM606 commited on
Commit
d688f9a
·
verified ·
1 Parent(s): 0a16832

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +52 -0
app.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import transformers
2
+ import torch
3
+ import os
4
+
5
+
6
+ print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
7
+ print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
8
+ print(f"torch version: {torch.version}")
9
+
10
+
11
+ # model_id = "meta-llama/Meta-Llama-3-8B"
12
+
13
+ # pipeline = transformers.pipeline(
14
+ # "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
15
+ # )
16
+ # pipeline("Hey how are you doing today?")
17
+
18
+ model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
19
+
20
+ pipeline = transformers.pipeline(
21
+ "text-generation",
22
+ model=model_id,
23
+ model_kwargs={"torch_dtype": torch.bfloat16},
24
+ device_map="auto",
25
+ )
26
+
27
+ messages = [
28
+ {
29
+ "role": "system",
30
+ "content": "You are a pirate chatbot who always responds in pirate speak!",
31
+ },
32
+ {"role": "user", "content": "Who are you?"},
33
+ ]
34
+
35
+ prompt = pipeline.tokenizer.apply_chat_template(
36
+ messages, tokenize=False, add_generation_prompt=True
37
+ )
38
+
39
+ terminators = [
40
+ pipeline.tokenizer.eos_token_id,
41
+ pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
42
+ ]
43
+
44
+ outputs = pipeline(
45
+ prompt,
46
+ max_new_tokens=256,
47
+ eos_token_id=terminators,
48
+ do_sample=True,
49
+ temperature=0.6,
50
+ top_p=0.9,
51
+ )
52
+ print(outputs[0]["generated_text"][len(prompt) :])