import gradio as gr from ctransformers import AutoModelForCausalLM import subprocess import os MODEL = "unsloth.Q8_0.gguf" print("Downloading model...") subprocess.run([ "wget", "-O", MODEL, "https://huggingface.co/Afifsudoers/NightPrompt_RV1_Instruct_8B_GGUF/resolve/main/unsloth.Q8_0.gguf?download=true" ], check=True) llm = AutoModelForCausalLM( MODEL, model_type="llama", n_threads=8, max_new_tokens=256 ) def chat_fn(message, history): prompt = "" for user, assistant in history: prompt += f"User: {user}\nAssistant: {assistant}\n" prompt += f"User: {message}\nAssistant:" output = llm(prompt, stop=["User:", "Assistant:"]) return output demo = gr.ChatInterface( fn=chat_fn, title="NightPrompt RV1 Instruct 8B Q8_0", description="Chat with NightPrompt RV1 Instruct 8B model quantized to Q8_0 using GGUF format.", ) if __name__ == "__main__": demo.launch()