Spaces:

ThinkAI-Morocco
/

TESTing

Sleeping

AkramOM606 commited on May 18, 2024

Commit

d688f9a

verified ·

1 Parent(s): 0a16832

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import transformers
+import torch
+import os
+print(f"Device name: {torch.cuda.get_device_properties('cuda').name}")
+print(f"FlashAttention available: {torch.backends.cuda.flash_sdp_enabled()}")
+print(f"torch version: {torch.version}")
+# model_id = "meta-llama/Meta-Llama-3-8B"
+# pipeline = transformers.pipeline(
+#     "text-generation", model=model_id, model_kwargs={"torch_dtype": torch.bfloat16}, device_map="auto"
+# )
+# pipeline("Hey how are you doing today?")
+model_id = "meta-llama/Meta-Llama-3-8B-Instruct"
+pipeline = transformers.pipeline(
+    "text-generation",
+    model=model_id,
+    model_kwargs={"torch_dtype": torch.bfloat16},
+    device_map="auto",
+)
+messages = [
+    {
+        "role": "system",
+        "content": "You are a pirate chatbot who always responds in pirate speak!",
+    },
+    {"role": "user", "content": "Who are you?"},
+]
+prompt = pipeline.tokenizer.apply_chat_template(
+    messages, tokenize=False, add_generation_prompt=True
+)
+terminators = [
+    pipeline.tokenizer.eos_token_id,
+    pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>"),
+]
+outputs = pipeline(
+    prompt,
+    max_new_tokens=256,
+    eos_token_id=terminators,
+    do_sample=True,
+    temperature=0.6,
+    top_p=0.9,
+)
+print(outputs[0]["generated_text"][len(prompt) :])