saikiranmansa commited on
Commit
1fdcf87
·
verified ·
1 Parent(s): 6ce2576

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -10
app.py CHANGED
@@ -1,4 +1,4 @@
1
- import streamlit as st
2
  import torch
3
  from transformers import LlamaTokenizer, AutoModelForSequenceClassification
4
  import os
@@ -14,35 +14,38 @@ if not hf_token:
14
  login(token=hf_token)
15
 
16
  # Load Model & Tokenizer
17
- model_name = "meta-llama/Llama-2-7b" # Ensure this is a fine-tuned classification model
18
 
19
  @st.cache_resource
20
  def load_model():
 
21
  tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
22
- model = AutoModelForSequenceClassification.from_pretrained(model_name, token=hf_token)
 
 
 
 
 
 
23
 
24
  # Move model to GPU if available
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
26
  model.to(device)
27
 
28
- # Convert model to 8-bit
29
- model.half() # Change to 16-bit for FP16, if necessary
30
- model.eval() # Set the model to evaluation mode
31
-
32
  return tokenizer, model, device
33
 
34
  tokenizer, model, device = load_model()
35
 
36
  # Define class labels (Update based on your dataset)
37
- class_labels = ["Negative", "Neutral", "Positive"]
38
 
39
  # Function to classify text
40
  def classify_text(user_input):
41
  inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True).to(device)
42
-
43
  with torch.no_grad():
44
  outputs = model(**inputs)
45
-
46
  logits = outputs.logits
47
  probabilities = torch.nn.functional.softmax(logits, dim=-1)
48
  predicted_class_idx = torch.argmax(probabilities, dim=-1).item()
@@ -69,3 +72,4 @@ if st.button("Classify"):
69
 
70
  st.markdown("---")
71
  st.write("🔍 This app classifies text using a fine-tuned LLaMA 2 model.")
 
 
1
+ iimport streamlit as st
2
  import torch
3
  from transformers import LlamaTokenizer, AutoModelForSequenceClassification
4
  import os
 
14
  login(token=hf_token)
15
 
16
  # Load Model & Tokenizer
17
+ model_name = "meta-llama/Llama-2-7b"
18
 
19
  @st.cache_resource
20
  def load_model():
21
+ # Load tokenizer
22
  tokenizer = LlamaTokenizer.from_pretrained(model_name, token=hf_token)
23
+
24
+ # Load model with 8-bit quantization
25
+ model = AutoModelForSequenceClassification.from_pretrained(
26
+ model_name,
27
+ quantization_config="8bit", # Use "4bit" for 4-bit quantization
28
+ token=hf_token
29
+ )
30
 
31
  # Move model to GPU if available
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
33
  model.to(device)
34
 
 
 
 
 
35
  return tokenizer, model, device
36
 
37
  tokenizer, model, device = load_model()
38
 
39
  # Define class labels (Update based on your dataset)
40
+ class_labels = ["Negative", "Neutral", "Positive"] # Modify if your model has different classes
41
 
42
  # Function to classify text
43
  def classify_text(user_input):
44
  inputs = tokenizer(user_input, return_tensors="pt", truncation=True, padding=True).to(device)
45
+
46
  with torch.no_grad():
47
  outputs = model(**inputs)
48
+
49
  logits = outputs.logits
50
  probabilities = torch.nn.functional.softmax(logits, dim=-1)
51
  predicted_class_idx = torch.argmax(probabilities, dim=-1).item()
 
72
 
73
  st.markdown("---")
74
  st.write("🔍 This app classifies text using a fine-tuned LLaMA 2 model.")
75
+