Update app.py
Browse files
app.py
CHANGED
|
@@ -1,4 +1,4 @@
|
|
| 1 |
-
|
| 2 |
import torch
|
| 3 |
from transformers import LlamaTokenizer, AutoModelForSequenceClassification
|
| 4 |
import os
|
|
@@ -14,35 +14,38 @@ if not hf_token:
|
|
| 14 |
login(token=hf_token)
|
| 15 |
|
| 16 |
# Load Model & Tokenizer
|
| 17 |
-
model_name = "meta-llama/Llama-2-7b"
|
| 18 |
|
| 19 |
@st.cache_resource
|
| 20 |
def load_model():
|
|
|
|
| 21 |
tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
|
| 22 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
| 24 |
# Move model to GPU if available
|
| 25 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 26 |
model.to(device)
|
| 27 |
|
| 28 |
-
# Convert model to 8-bit
|
| 29 |
-
model.half() # Change to 16-bit for FP16, if necessary
|
| 30 |
-
model.eval() # Set the model to evaluation mode
|
| 31 |
-
|
| 32 |
return tokenizer, model, device
|
| 33 |
|
| 34 |
tokenizer, model, device = load_model()
|
| 35 |
|
| 36 |
# Define class labels (Update based on your dataset)
|
| 37 |
-
class_labels = ["Negative", "Neutral", "Positive"]
|
| 38 |
|
| 39 |
# Function to classify text
|
| 40 |
def classify_text(user_input):
|
| 41 |
inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 42 |
-
|
| 43 |
with torch.no_grad():
|
| 44 |
outputs = model(**inputs)
|
| 45 |
-
|
| 46 |
logits = outputs.logits
|
| 47 |
probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
| 48 |
predicted_class_idx = torch.argmax(probabilities, dim=-1).item()
|
|
@@ -69,3 +72,4 @@ if st.button("Classify"):
|
|
| 69 |
|
| 70 |
st.markdown("---")
|
| 71 |
st.write("🔍 This app classifies text using a fine-tuned LLaMA 2 model.")
|
|
|
|
|
|
| 1 |
+
iimport streamlit as st
|
| 2 |
import torch
|
| 3 |
from transformers import LlamaTokenizer, AutoModelForSequenceClassification
|
| 4 |
import os
|
|
|
|
| 14 |
login(token=hf_token)
|
| 15 |
|
| 16 |
# Load Model & Tokenizer
|
| 17 |
+
model_name = "meta-llama/Llama-2-7b"
|
| 18 |
|
| 19 |
@st.cache_resource
|
| 20 |
def load_model():
|
| 21 |
+
# Load tokenizer
|
| 22 |
tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
|
| 23 |
+
|
| 24 |
+
# Load model with 8-bit quantization
|
| 25 |
+
model = AutoModelForSequenceClassification.from_pretrained(
|
| 26 |
+
model_name,
|
| 27 |
+
quantization_config="8bit", # Use "4bit" for 4-bit quantization
|
| 28 |
+
token=hf_token
|
| 29 |
+
)
|
| 30 |
|
| 31 |
# Move model to GPU if available
|
| 32 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 33 |
model.to(device)
|
| 34 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
return tokenizer, model, device
|
| 36 |
|
| 37 |
tokenizer, model, device = load_model()
|
| 38 |
|
| 39 |
# Define class labels (Update based on your dataset)
|
| 40 |
+
class_labels = ["Negative", "Neutral", "Positive"] # Modify if your model has different classes
|
| 41 |
|
| 42 |
# Function to classify text
|
| 43 |
def classify_text(user_input):
|
| 44 |
inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True).to(device)
|
| 45 |
+
|
| 46 |
with torch.no_grad():
|
| 47 |
outputs = model(**inputs)
|
| 48 |
+
|
| 49 |
logits = outputs.logits
|
| 50 |
probabilities = torch.nn.functional.softmax(logits, dim=-1)
|
| 51 |
predicted_class_idx = torch.argmax(probabilities, dim=-1).item()
|
|
|
|
| 72 |
|
| 73 |
st.markdown("---")
|
| 74 |
st.write("🔍 This app classifies text using a fine-tuned LLaMA 2 model.")
|
| 75 |
+
|