Spaces:
Runtime error
Runtime error
| import os | |
| import json | |
| import numpy as np | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| from llama_cpp import Llama | |
| import gradio as gr | |
| hub_model_path = hf_hub_download( | |
| repo_id='TheBloke/h2ogpt-4096-llama2-13B-GGML', | |
| filename='h2ogpt-4096-llama2-13b.ggmlv3.q2_K.bin' | |
| ) | |
| model = Llama( | |
| model_path=hub_model_path, | |
| n_ctx=220, # Maximum context size. TODO: Increase this later. | |
| use_mlock=True, # Force the system to keep the model in RAM. | |
| seed=77, | |
| n_batch=64 | |
| ) | |
| def generate(prompt): | |
| output = model(prompt, max_tokens=64, stop=['Q:', '\n'], echo=True) | |
| return json.dumps(output, indent=4) | |
| iface = gr.Interface(fn=generate, inputs='text', outputs='text') | |
| iface.launch() | |