Spaces:
Runtime error
Runtime error
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline | |
| import gradio as gr | |
| model_name = "NousResearch/Nous-Hermes-llama2-13b" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, | |
| device_map="auto", | |
| trust_remote_code=True | |
| ) | |
| generator = pipeline("text-generation", model=model, tokenizer=tokenizer) | |
| def chat(prompt): | |
| output = generator( | |
| prompt, | |
| max_new_tokens=512, | |
| do_sample=True, | |
| temperature=0.8, | |
| top_k=60, | |
| top_p=0.95 | |
| ) | |
| return output[0]["generated_text"] | |
| demo = gr.Interface(fn=chat, inputs="text", outputs="text") | |
| demo.launch() | |