Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from ctransformers import AutoModelForCausalLM | |
| # choose your champion | |
| #model_id = "TheBloke/Llama-2-7B-GGML" | |
| model_id = "TheBloke/Llama-2-7B-chat-GGML" | |
| #model_id = "TheBloke/Llama-2-13B-GGML" | |
| #model_id = "TheBloke/Llama-2-13B-chat-GGML" | |
| # instantiate other inputs | |
| gpu_layers = 130 if '13B' in model_id else 110 | |
| config = {'max_new_tokens': 256, 'repetition_penalty': 1.1, 'temperature': 0.1, 'stream': True} | |
| # get llm instance | |
| llm = AutoModelForCausalLM.from_pretrained(model_id, | |
| model_type="llama", | |
| #lib='avx2', #for cpu use | |
| gpu_layers=gpu_layers, #110 for 7b, 130 for 13b | |
| **config | |
| ) | |
| def predict(prompt): | |
| # write prompt & tokenize | |
| #system_prompt = """ | |
| #""" | |
| # send through model | |
| res = llm(prompt, stream=False) | |
| return res | |
| demo = gr.Interface( | |
| fn=predict, | |
| inputs='text', | |
| outputs='text', | |
| ) | |
| demo.launch() |