saikiranmansa commited on
Commit
5e851f5
·
verified ·
1 Parent(s): 6389890

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -1,8 +1,8 @@
1
  import streamlit as st
2
  import torch
3
  from transformers import LlamaTokenizer, AutoModelForSequenceClassification
4
- import os
5
  from huggingface_hub import login
 
6
 
7
  # Hugging Face Authentication
8
  hf_token = os.getenv("HUGGINGFACE_TOKEN", "").strip()
@@ -21,10 +21,11 @@ def load_model():
21
  # Load tokenizer
22
  tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
23
 
24
- # Load model with 8-bit quantization
25
  model = AutoModelForSequenceClassification.from_pretrained(
26
  model_name,
27
- quantization_config="8bit", # Use "4bit" for 4-bit quantization
 
28
  token=hf_token
29
  )
30
 
@@ -72,4 +73,3 @@ if st.button("Classify"):
72
 
73
  st.markdown("---")
74
  st.write("🔍 This app classifies text using a fine-tuned LLaMA 2 model.")
75
-
 
1
  import streamlit as st
2
  import torch
3
  from transformers import LlamaTokenizer, AutoModelForSequenceClassification
 
4
  from huggingface_hub import login
5
+ import os
6
 
7
  # Hugging Face Authentication
8
  hf_token = os.getenv("HUGGINGFACE_TOKEN", "").strip()
 
21
  # Load tokenizer
22
  tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
23
 
24
+ # Load model with bitsandbytes for 8-bit quantization
25
  model = AutoModelForSequenceClassification.from_pretrained(
26
  model_name,
27
+ load_in_8bit=True, # Set this for 8-bit quantization
28
+ device_map="auto", # Automatically maps model to available devices
29
  token=hf_token
30
  )
31
 
 
73
 
74
  st.markdown("---")
75
  st.write("🔍 This app classifies text using a fine-tuned LLaMA 2 model.")