Update app.py
Browse files
app.py
CHANGED
|
@@ -1,64 +1,69 @@
|
|
| 1 |
# %%writefile deployment_files/app.py
|
| 2 |
import streamlit as st
|
| 3 |
-
# REMOVED: import os (we still need it for safety later, but the primary ENV setting goes to Docker)
|
| 4 |
import time
|
|
|
|
| 5 |
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
| 6 |
|
| 7 |
-
# ---
|
| 8 |
-
|
| 9 |
-
# --- Configuration ---
|
| 10 |
MODEL_NAME = "google/flan-t5-small"
|
| 11 |
|
| 12 |
# -------------------- Model Logic --------------------
|
| 13 |
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
"""
|
| 17 |
|
| 18 |
@st.cache_resource
|
| 19 |
def load_llm():
|
| 20 |
-
|
| 21 |
-
# Since we removed torch, device is always 'cpu' conceptually
|
| 22 |
device = "CPU (TensorFlow)"
|
| 23 |
-
|
| 24 |
try:
|
| 25 |
with st.spinner(f"Loading tokenizer and model ({MODEL_NAME}) on {device}..."):
|
| 26 |
st.info(f"Using device: **{device}**. Starting model download...")
|
| 27 |
-
|
| 28 |
start_time = time.time()
|
| 29 |
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
|
| 30 |
-
|
| 31 |
-
# CRUCIAL CHANGE: Load the TensorFlow version of the model
|
| 32 |
model = TFT5ForConditionalGeneration.from_pretrained(MODEL_NAME)
|
| 33 |
-
|
| 34 |
end_time = time.time()
|
| 35 |
st.success(f"Model {MODEL_NAME} loaded successfully in {end_time - start_time:.2f} seconds!")
|
| 36 |
-
|
| 37 |
return tokenizer, model, device
|
| 38 |
-
|
| 39 |
except Exception as e:
|
| 40 |
st.error(f"FATAL ERROR LOADING MODEL: {e}")
|
| 41 |
-
st.info("Model load failed.
|
| 42 |
return None, None, None
|
| 43 |
-
# ... (rest of app.py functions and UI logic are identical) ...
|
| 44 |
|
| 45 |
def llm_response(tokenizer, model, device, prompt):
|
| 46 |
if tokenizer is None or model is None:
|
| 47 |
return "Model not initialized due to previous error."
|
| 48 |
|
| 49 |
-
# Process input (TensorFlow models automatically handle device placement on CPU)
|
| 50 |
-
# TF models use the 'tf' argument for return tensors
|
| 51 |
input_ids = tokenizer(prompt, return_tensors="tf").input_ids
|
|
|
|
|
|
|
| 52 |
|
| 53 |
-
# Generate response
|
| 54 |
-
outputs = model.generate(input_ids, max_length=150, do_sample=False)
|
| 55 |
-
|
| 56 |
-
# Decode and clean the output
|
| 57 |
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
| 58 |
|
| 59 |
def predict_review_sentiment(tokenizer, model, device, review):
|
| 60 |
-
|
| 61 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
|
| 64 |
# -------------------- Streamlit UI --------------------
|
|
|
|
| 1 |
# %%writefile deployment_files/app.py
|
| 2 |
import streamlit as st
|
|
|
|
| 3 |
import time
|
| 4 |
+
# REMOVED: import os (Not needed as it's in Dockerfile)
|
| 5 |
from transformers import T5Tokenizer, TFT5ForConditionalGeneration
|
| 6 |
|
| 7 |
+
# --- Configuration (Unchanged) ---
|
|
|
|
|
|
|
| 8 |
MODEL_NAME = "google/flan-t5-small"
|
| 9 |
|
| 10 |
# -------------------- Model Logic --------------------
|
| 11 |
|
| 12 |
+
# CRITICAL FIX: Simplified and highly directive prompt for the smallest model
|
| 13 |
+
sys_prompt = "Classify the sentiment of the following customer review as either 'positive', 'negative', or 'neutral'. Respond with only one word."
|
|
|
|
| 14 |
|
| 15 |
@st.cache_resource
|
| 16 |
def load_llm():
|
| 17 |
+
# ... (load_llm function remains identical) ...
|
|
|
|
| 18 |
device = "CPU (TensorFlow)"
|
|
|
|
| 19 |
try:
|
| 20 |
with st.spinner(f"Loading tokenizer and model ({MODEL_NAME}) on {device}..."):
|
| 21 |
st.info(f"Using device: **{device}**. Starting model download...")
|
|
|
|
| 22 |
start_time = time.time()
|
| 23 |
tokenizer = T5Tokenizer.from_pretrained(MODEL_NAME)
|
|
|
|
|
|
|
| 24 |
model = TFT5ForConditionalGeneration.from_pretrained(MODEL_NAME)
|
|
|
|
| 25 |
end_time = time.time()
|
| 26 |
st.success(f"Model {MODEL_NAME} loaded successfully in {end_time - start_time:.2f} seconds!")
|
|
|
|
| 27 |
return tokenizer, model, device
|
|
|
|
| 28 |
except Exception as e:
|
| 29 |
st.error(f"FATAL ERROR LOADING MODEL: {e}")
|
| 30 |
+
st.info("Model load failed.")
|
| 31 |
return None, None, None
|
|
|
|
| 32 |
|
| 33 |
def llm_response(tokenizer, model, device, prompt):
|
| 34 |
if tokenizer is None or model is None:
|
| 35 |
return "Model not initialized due to previous error."
|
| 36 |
|
|
|
|
|
|
|
| 37 |
input_ids = tokenizer(prompt, return_tensors="tf").input_ids
|
| 38 |
+
# Set max_length=1 to force a single token output if possible, but 2 is safer for labels
|
| 39 |
+
outputs = model.generate(input_ids, max_length=3, do_sample=False)
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
return tokenizer.decode(outputs[0], skip_special_tokens=True).strip()
|
| 42 |
|
| 43 |
def predict_review_sentiment(tokenizer, model, device, review):
|
| 44 |
+
"""
|
| 45 |
+
CLEANED PROMPT FORMATTING.
|
| 46 |
+
The final prompt sent to the model is simple:
|
| 47 |
+
"Classify the sentiment... Respond with only one word. Review: {review text}"
|
| 48 |
+
"""
|
| 49 |
+
# FIX: Combine the strict system prompt and the review text clearly
|
| 50 |
+
full_prompt = f"{sys_prompt} Review: '{review}'"
|
| 51 |
+
|
| 52 |
+
# Run the prediction and convert the output to standard casing
|
| 53 |
+
response = llm_response(tokenizer, model, device, full_prompt)
|
| 54 |
+
|
| 55 |
+
# Attempt to normalize the model's output to the three categories
|
| 56 |
+
normalized_response = response.lower().strip()
|
| 57 |
+
|
| 58 |
+
if "positive" in normalized_response:
|
| 59 |
+
return "Positive"
|
| 60 |
+
elif "negative" in normalized_response:
|
| 61 |
+
return "Negative"
|
| 62 |
+
elif "neutral" in normalized_response:
|
| 63 |
+
return "Neutral"
|
| 64 |
+
else:
|
| 65 |
+
# For non-classification outputs like 'hi', return the raw response
|
| 66 |
+
return response
|
| 67 |
|
| 68 |
|
| 69 |
# -------------------- Streamlit UI --------------------
|