from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline import gradio as gr model_name = "mistralai/Mistral-7B-v0.1" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, device_map="auto", load_in_4bit=True # 4-bit quantization ) pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) def generate(text): outputs = pipe(text, max_length=100, do_sample=True) return outputs[0]["generated_text"] demo = gr.Interface(fn=generate, inputs="text", outputs="text") demo.launch()