bnb / app.py
kanneboinakumar's picture
Upload 3 files
48f8659 verified
raw
history blame contribute delete
831 Bytes
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
def load_model():
tokenizer = AutoTokenizer.from_pretrained("quantized_model")
model = AutoModelForCausalLM.from_pretrained(
"quantized_model",
device_map="auto",
torch_dtype=torch.bfloat16,
)
return tokenizer, model
tokenizer, model = load_model()
st.title("Quantized Model Inference")
user_input = st.text_input("Enter your prompt:")
if st.button("Generate"):
if user_input:
inputs = tokenizer(user_input, return_tensors="pt").to("cuda")
outputs = model.generate(**inputs)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
st.write(f"Response: {response}")
else:
st.write("Please enter a prompt.")