YAMITEK commited on
Commit
5b927ab
·
verified ·
1 Parent(s): ca8f389

Upload 2 files

Browse files
Files changed (2) hide show
  1. app (1).py +27 -0
  2. requirements (1).txt +3 -0
app (1).py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ import torch
4
+
5
+ def load_model():
6
+ tokenizer = AutoTokenizer.from_pretrained("quantized_model")
7
+ model = AutoModelForCausalLM.from_pretrained(
8
+ "quantized_model",
9
+ device_map="auto",
10
+ torch_dtype=torch.bfloat16,
11
+ )
12
+ return tokenizer, model
13
+
14
+ tokenizer, model = load_model()
15
+
16
+ st.title("Quantized Model Inference")
17
+
18
+ user_input = st.text_input("Enter your prompt:")
19
+
20
+ if st.button("Generate"):
21
+ if user_input:
22
+ inputs = tokenizer(user_input, return_tensors="pt").to("cuda")
23
+ outputs = model.generate(**inputs)
24
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+ st.write(f"Response: {response}")
26
+ else:
27
+ st.write("Please enter a prompt.")
requirements (1).txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch