Spaces:

saikiranmansa
/

LLaMA2_text_classification

Sleeping

saikiranmansa commited on Feb 22, 2025

Commit

5eecd78

verified ·

1 Parent(s): fa6a83c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -21,11 +21,11 @@ def load_model():
     # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
-    # Load model with 4-bit quantization on CPU
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",  # Force CPU usage
-        load_in_4bit=True,  # Enable 4-bit quantization
         token=hf_token
     )
@@ -57,7 +57,7 @@ def classify_text(text, classes):
     return predicted_class
 # Streamlit UI
-st.title("📝 Text Classification with LLaMA 2 Chat (CPU, 4-bit)")
 st.write("Powered by LLaMA 2 Chat & Hugging Face")
 # User Input
@@ -78,4 +78,4 @@ if st.button("Classify"):
         st.warning("Please enter some text to classify.")
 st.markdown("---")
-st.write("🔍 This app classifies text using the LLaMA 2 Chat model with 4-bit quantization on CPU.")

     # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(model_name, token=hf_token)
+    # Load model with FP16 (half-precision) on CPU
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
         device_map="cpu",  # Force CPU usage
+        torch_dtype=torch.float16,  # Use FP16 to reduce memory usage
         token=hf_token
     )
     return predicted_class
 # Streamlit UI
+st.title("📝 Text Classification with LLaMA 2 Chat (CPU, FP16)")
 st.write("Powered by LLaMA 2 Chat & Hugging Face")
 # User Input
         st.warning("Please enter some text to classify.")
 st.markdown("---")
+st.write("🔍 This app classifies text using the LLaMA 2 Chat model with FP16 on CPU.")