saikiranmansa commited on
Commit
08a9d39
Β·
verified Β·
1 Parent(s): 9964b88

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -8
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import torch
3
- from transformers import LlamaTokenizer, AutoModelForCausalLM
4
  import os
5
  from huggingface_hub import login
6
 
@@ -14,17 +14,18 @@ if not hf_token:
14
  login(token=hf_token)
15
 
16
  # Load Model & Tokenizer
17
- model_name = "meta-llama/LLaMA-2-7b-chat-hf" # Use the chat version for better instruction-following
18
 
19
  @st.cache_resource
20
  def load_model():
21
  # Load tokenizer
22
- tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
23
 
24
- # Load model for causal language modeling
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
- device_map="auto", # Automatically maps model to available devices
 
28
  token=hf_token
29
  )
30
 
@@ -56,8 +57,8 @@ def classify_text(text, classes):
56
  return predicted_class
57
 
58
  # Streamlit UI
59
- st.title("πŸ“ Text Classification with LLaMA 2")
60
- st.write("Powered by LLaMA 2 & Hugging Face")
61
 
62
  # User Input
63
  user_input = st.text_area("Enter the text to classify:")
@@ -77,4 +78,4 @@ if st.button("Classify"):
77
  st.warning("Please enter some text to classify.")
78
 
79
  st.markdown("---")
80
- st.write("πŸ” This app classifies text using the LLaMA 2 model.")
 
1
  import streamlit as st
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import os
5
  from huggingface_hub import login
6
 
 
14
  login(token=hf_token)
15
 
16
  # Load Model & Tokenizer
17
+ model_name = "meta-llama/Llama-2-7b-chat-hf" # Use the chat model
18
 
19
  @st.cache_resource
20
  def load_model():
21
  # Load tokenizer
22
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
23
 
24
+ # Load model with 4-bit quantization on CPU
25
  model = AutoModelForCausalLM.from_pretrained(
26
  model_name,
27
+ device_map="cpu", # Force CPU usage
28
+ load_in_4bit=True, # Enable 4-bit quantization
29
  token=hf_token
30
  )
31
 
 
57
  return predicted_class
58
 
59
  # Streamlit UI
60
+ st.title("πŸ“ Text Classification with LLaMA 2 Chat (CPU, 4-bit)")
61
+ st.write("Powered by LLaMA 2 Chat & Hugging Face")
62
 
63
  # User Input
64
  user_input = st.text_area("Enter the text to classify:")
 
78
  st.warning("Please enter some text to classify.")
79
 
80
  st.markdown("---")
81
+ st.write("πŸ” This app classifies text using the LLaMA 2 Chat model with 4-bit quantization on CPU.")