Spaces:
Sleeping
Sleeping
| import gradio as gr # type: ignore | |
| import spaces # type: ignore | |
| import torch | |
| # Load model directly | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| tokenizer = AutoTokenizer.from_pretrained( | |
| "microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "microsoft/Phi-3-mini-4k-instruct", trust_remote_code=True | |
| ) | |
| model.to("cuda") | |
| def greet(name, sliderint): | |
| return "Hellonyaaaaa " + name + "!!" + str(sliderint) | |
| chat_template = ( | |
| "{% for message in messages %}" | |
| "{{'<|' + message['role'] + '|>' + message['content'] + '\n'}}" | |
| "{% endfor %}" | |
| "{% if add_generation_prompt %}" | |
| "{{ '<|model|>\n' }}" | |
| "{% endif %}" | |
| ) | |
| # @spaces.GPU(duration=45) | |
| def chatinterface_fn(message, history): | |
| prompt = [] | |
| for human, assistant in history: | |
| prompt.append({"role": "user", "content": human}) | |
| prompt.append({"role": "model", "content": assistant}) | |
| prompt.append({"role": "user", "content": message}) | |
| token_ids = tokenizer.apply_chat_template( | |
| prompt, | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| chat_template=chat_template, | |
| return_tensors="pt", | |
| ) | |
| print("token_ids:", token_ids) # デバッグ用に追加 | |
| output_ids = model.generate( | |
| token_ids.to(model.device), | |
| temperature=0.1, | |
| do_sample=True, | |
| top_p=0.95, | |
| top_k=40, | |
| max_new_tokens=256, | |
| ) | |
| text = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| print(text) | |
| return text | |
| def infer(message: str) -> str: | |
| input_ids = tokenizer.encode( | |
| "hello, this is", add_special_tokens=False, return_tensors="pt" | |
| ).to(model.device) | |
| print(model.device) | |
| outputs = model.generate(input_ids) | |
| text = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| return text | |
| with gr.Blocks() as demo: | |
| name = gr.Textbox(label="name") | |
| output = gr.Interface(fn=greet, inputs=["text", "slider"], outputs="text") | |
| a = gr.ChatInterface(chatinterface_fn, title="microsoft/Phi-3-mini-4k-instruct") | |
| b = gr.Interface(fn=infer, inputs="text", outputs="text") | |
| demo.launch() | |