Spaces:
Runtime error
Runtime error
File size: 2,674 Bytes
1dd8d6e 376d532 1dd8d6e 9af0dbc 0ee8fa9 1dd8d6e 8fd5823 376d532 82325f6 376d532 1dd8d6e 376d532 1dd8d6e d665e1b 040d697 9c8dc08 040d697 82325f6 1dd8d6e 82325f6 1dd8d6e dc37782 8fd5823 dc37782 376d532 0ee8fa9 8fd5823 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
import spaces
import os
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import gradio as gr
text_generator = None
def init():
huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
if not huggingface_token:
pass
print("no HUGGINGFACE_TOKEN if you need set secret ")
#raise ValueError("HUGGINGFACE_TOKEN environment variable is not set")
model_id = "google/gemma-2-9b-it"
device = "auto" # torch.device("cuda" if torch.cuda.is_available() else "cpu")
dtype = torch.bfloat16
tokenizer = AutoTokenizer.from_pretrained(model_id, token=huggingface_token)
print(model_id,device,dtype)
histories = []
#model = None
model = AutoModelForCausalLM.from_pretrained(
model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
)
text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
if next(model.parameters()).is_cuda:
print("The model is on a GPU")
else:
print("The model is on a CPU")
if text_generator.device == 'cuda':
print("The pipeline is using a GPU")
else:
print("The pipeline is using a CPU")
print("initialized")
@spaces.GPU(duration=120)
def generate_text(messages):
# model = AutoModelForCausalLM.from_pretrained(
# model_id, token=huggingface_token ,torch_dtype=dtype,device_map=device
# )
#text_generator = pipeline("text-generation", model=model, tokenizer=tokenizer,torch_dtype=dtype,device_map=device) #pipeline has not to(device)
result = text_generator(messages, max_new_tokens=256, do_sample=True, temperature=0.7)
generated_output = result[0]["generated_text"]
if isinstance(generated_output, list):
for message in reversed(generated_output):
if message.get("role") == "assistant":
content= message.get("content", "No content found.")
return content
return "No assistant response found."
else:
return "Unexpected output format."
def call_generate_text(message, history):
# history.append({"role": "user", "content": message})
print(message)
print(history)
messages = history+[{"role":"user","content":message}]
try:
text = generate_text(messages)
return text
except RuntimeError as e:
print(f"An unexpected error occurred: {e}")
return ""
demo = gr.ChatInterface(call_generate_text,type="messages")
if __name__ == "__main__":
init()
demo.launch(share=True)
|