Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from pathlib import Path | |
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| from sentence_transformers import SentenceTransformer | |
| ROOT = Path(__file__).resolve().parent | |
| MODEL_PATH = ROOT / "models" / "best_model.joblib" | |
| LABEL_DETAILS = { | |
| "Safe": { | |
| "emoji": "🟢", | |
| "title": "Safe", | |
| "description": "No meaningful spoiler detected.", | |
| }, | |
| "Mild": { | |
| "emoji": "🟡", | |
| "title": "Mild Spoiler", | |
| "description": "Contains broad setup, tone, or non-critical plot information.", | |
| }, | |
| "Major": { | |
| "emoji": "🔴", | |
| "title": "Major Spoiler", | |
| "description": "May reveal a key twist, death, identity, ending, or outcome.", | |
| }, | |
| } | |
| def load_pipeline() -> tuple[object, SentenceTransformer, list[str]]: | |
| payload = joblib.load(MODEL_PATH) | |
| metadata = payload["metadata"] | |
| model = payload["model"] | |
| embedder = SentenceTransformer(metadata["embedding_model"]) | |
| return model, embedder, metadata["label_classes"] | |
| CLASSIFIER, EMBEDDER, LABEL_CLASSES = load_pipeline() | |
| def confidence_from_model(model: object, embedding: np.ndarray, label_id: int) -> float: | |
| if hasattr(model, "predict_proba"): | |
| probabilities = model.predict_proba(embedding)[0] | |
| return float(probabilities[label_id]) | |
| if hasattr(model, "decision_function"): | |
| scores = model.decision_function(embedding) | |
| if scores.ndim == 1: | |
| scores = scores.reshape(1, -1) | |
| shifted = scores[0] - np.max(scores[0]) | |
| probabilities = np.exp(shifted) / np.exp(shifted).sum() | |
| return float(probabilities[label_id]) | |
| return 0.0 | |
| def analyze_review(review: str) -> tuple[str, str, str]: | |
| text = review.strip() | |
| if not text: | |
| return "Paste a movie review first.", "", "" | |
| embedding = EMBEDDER.encode( | |
| [text], | |
| convert_to_numpy=True, | |
| normalize_embeddings=True, | |
| ) | |
| label_id = int(CLASSIFIER.predict(embedding)[0]) | |
| label = LABEL_CLASSES[label_id] | |
| confidence = confidence_from_model(CLASSIFIER, embedding, label_id) | |
| details = LABEL_DETAILS[label] | |
| result = f"{details['emoji']} {details['title']} ({confidence:.0%})" | |
| explanation = details["description"] | |
| original = text | |
| return result, explanation, original | |
| EXAMPLES = [ | |
| [ | |
| "The performances are excellent and the pacing is tense throughout, but I can recommend it without saying anything about the plot." | |
| ], | |
| [ | |
| "The second act has a tense confrontation that changes how the hero sees their mission, but the movie saves its biggest answers for later." | |
| ], | |
| [ | |
| "The final twist reveals that the hero's closest friend was secretly working for the villain the entire time." | |
| ], | |
| ] | |
| with gr.Blocks(title="Multi-Source Spoiler Detector") as demo: | |
| gr.Markdown("# Multi-Source Spoiler Detector") | |
| gr.Markdown("Classify a movie review as Safe, Mild Spoiler, or Major Spoiler.") | |
| with gr.Row(): | |
| review_input = gr.Textbox( | |
| label="Movie review", | |
| placeholder="Paste a movie review here...", | |
| lines=8, | |
| ) | |
| analyze_button = gr.Button("Analyze", variant="primary") | |
| result_output = gr.Textbox(label="Result", interactive=False) | |
| explanation_output = gr.Textbox(label="Why", interactive=False) | |
| with gr.Accordion("Original text", open=False): | |
| original_output = gr.Textbox(label="Original", lines=6, interactive=False) | |
| gr.Examples( | |
| examples=EXAMPLES, | |
| inputs=review_input, | |
| ) | |
| analyze_button.click( | |
| analyze_review, | |
| inputs=review_input, | |
| outputs=[result_output, explanation_output, original_output], | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |