leoole's picture
Upload app.py
4ba72c4 verified
from __future__ import annotations
from pathlib import Path
import gradio as gr
import joblib
import numpy as np
from sentence_transformers import SentenceTransformer
ROOT = Path(__file__).resolve().parent
MODEL_PATH = ROOT / "models" / "best_model.joblib"
LABEL_DETAILS = {
"Safe": {
"emoji": "🟢",
"title": "Safe",
"description": "No meaningful spoiler detected.",
},
"Mild": {
"emoji": "🟡",
"title": "Mild Spoiler",
"description": "Contains broad setup, tone, or non-critical plot information.",
},
"Major": {
"emoji": "🔴",
"title": "Major Spoiler",
"description": "May reveal a key twist, death, identity, ending, or outcome.",
},
}
def load_pipeline() -> tuple[object, SentenceTransformer, list[str]]:
payload = joblib.load(MODEL_PATH)
metadata = payload["metadata"]
model = payload["model"]
embedder = SentenceTransformer(metadata["embedding_model"])
return model, embedder, metadata["label_classes"]
CLASSIFIER, EMBEDDER, LABEL_CLASSES = load_pipeline()
def confidence_from_model(model: object, embedding: np.ndarray, label_id: int) -> float:
if hasattr(model, "predict_proba"):
probabilities = model.predict_proba(embedding)[0]
return float(probabilities[label_id])
if hasattr(model, "decision_function"):
scores = model.decision_function(embedding)
if scores.ndim == 1:
scores = scores.reshape(1, -1)
shifted = scores[0] - np.max(scores[0])
probabilities = np.exp(shifted) / np.exp(shifted).sum()
return float(probabilities[label_id])
return 0.0
def analyze_review(review: str) -> tuple[str, str, str]:
text = review.strip()
if not text:
return "Paste a movie review first.", "", ""
embedding = EMBEDDER.encode(
[text],
convert_to_numpy=True,
normalize_embeddings=True,
)
label_id = int(CLASSIFIER.predict(embedding)[0])
label = LABEL_CLASSES[label_id]
confidence = confidence_from_model(CLASSIFIER, embedding, label_id)
details = LABEL_DETAILS[label]
result = f"{details['emoji']} {details['title']} ({confidence:.0%})"
explanation = details["description"]
original = text
return result, explanation, original
EXAMPLES = [
[
"The performances are excellent and the pacing is tense throughout, but I can recommend it without saying anything about the plot."
],
[
"The second act has a tense confrontation that changes how the hero sees their mission, but the movie saves its biggest answers for later."
],
[
"The final twist reveals that the hero's closest friend was secretly working for the villain the entire time."
],
]
with gr.Blocks(title="Multi-Source Spoiler Detector") as demo:
gr.Markdown("# Multi-Source Spoiler Detector")
gr.Markdown("Classify a movie review as Safe, Mild Spoiler, or Major Spoiler.")
with gr.Row():
review_input = gr.Textbox(
label="Movie review",
placeholder="Paste a movie review here...",
lines=8,
)
analyze_button = gr.Button("Analyze", variant="primary")
result_output = gr.Textbox(label="Result", interactive=False)
explanation_output = gr.Textbox(label="Why", interactive=False)
with gr.Accordion("Original text", open=False):
original_output = gr.Textbox(label="Original", lines=6, interactive=False)
gr.Examples(
examples=EXAMPLES,
inputs=review_input,
)
analyze_button.click(
analyze_review,
inputs=review_input,
outputs=[result_output, explanation_output, original_output],
)
if __name__ == "__main__":
demo.launch()