Spaces:
Running on Zero
Running on Zero
File size: 1,506 Bytes
8b413b1 c35a5b8 d8d0150 c35a5b8 e51a65b c4758d0 503a809 c4758d0 503a809 c4758d0 c35a5b8 190ebf5 c35a5b8 c4758d0 c35a5b8 c4758d0 c35a5b8 c4758d0 c35a5b8 c4758d0 c35a5b8 190ebf5 b2a8d36 c35a5b8 c4758d0 c35a5b8 ac31c36 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import spaces
import torch
from threading import Thread
from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
import gradio as gr
MODEL_ID = "NoesisLab/Spartacus-1B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
"NoesisLab/Spartacus-1B-Instruct",
trust_remote_code=True,
tie_word_embeddings=False # 尝试强制关闭权重绑定检查
)
@spaces.GPU
def respond(message, history):
messages = [{"role": "system", "content": "You are Spartacus, a helpful assistant."}]
for msg in history:
messages.append({"role": msg["role"], "content": msg["content"]})
messages.append({"role": "user", "content": message})
input_ids = tokenizer.apply_chat_template(
messages, add_generation_prompt=True, return_tensors="pt"
).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generate_kwargs = dict(
input_ids=input_ids,
streamer=streamer,
temperature=0.5,
top_p=0.9,
do_sample=True,
)
thread = Thread(target=model.generate, kwargs=generate_kwargs)
thread.start()
response = ""
for token in streamer:
response += token
yield response
demo = gr.ChatInterface(
fn=respond,
title="Spartacus Chat",
description="Chat with NoesisLab/Spartacus-1B-Instruct",
)
if __name__ == "__main__":
demo.launch() |