saikiranmansa commited on
Commit
5eecd78
Β·
verified Β·
1 Parent(s): fa6a83c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -21,11 +21,11 @@ def load_model():
21
  # Load tokenizer
22
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
23
 
24
- # Load model with 4-bit quantization on CPU
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
  device_map="cpu", # Force CPU usage
28
- load_in_4bit=True, # Enable 4-bit quantization
29
  token=hf_token
30
  )
31
 
@@ -57,7 +57,7 @@ def classify_text(text, classes):
57
  return predicted_class
58
 
59
  # Streamlit UI
60
- st.title("πŸ“ Text Classification with LLaMA 2 Chat (CPU, 4-bit)")
61
  st.write("Powered by LLaMA 2 Chat & Hugging Face")
62
 
63
  # User Input
@@ -78,4 +78,4 @@ if st.button("Classify"):
78
  st.warning("Please enter some text to classify.")
79
 
80
  st.markdown("---")
81
- st.write("πŸ” This app classifies text using the LLaMA 2 Chat model with 4-bit quantization on CPU.")
 
21
  # Load tokenizer
22
  tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
23
 
24
+ # Load model with FP16 (half-precision) on CPU
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
  device_map="cpu", # Force CPU usage
28
+ torch_dtype=torch.float16, # Use FP16 to reduce memory usage
29
  token=hf_token
30
  )
31
 
 
57
  return predicted_class
58
 
59
  # Streamlit UI
60
+ st.title("πŸ“ Text Classification with LLaMA 2 Chat (CPU, FP16)")
61
  st.write("Powered by LLaMA 2 Chat & Hugging Face")
62
 
63
  # User Input
 
78
  st.warning("Please enter some text to classify.")
79
 
80
  st.markdown("---")
81
+ st.write("πŸ” This app classifies text using the LLaMA 2 Chat model with FP16 on CPU.")