Spaces:

leoole
/

spoiler-detector

Sleeping

App Files Files Community

spoiler-detector / app.py

leoole

Upload app.py

4ba72c4 verified 21 days ago

raw

history blame contribute delete

3.77 kB

	from __future__ import annotations

	from pathlib import Path

	import gradio as gr
	import joblib
	import numpy as np
	from sentence_transformers import SentenceTransformer


	ROOT = Path(__file__).resolve().parent
	MODEL_PATH = ROOT / "models" / "best_model.joblib"


	LABEL_DETAILS = {
	"Safe": {
	"emoji": "🟢",
	"title": "Safe",
	"description": "No meaningful spoiler detected.",
	},
	"Mild": {
	"emoji": "🟡",
	"title": "Mild Spoiler",
	"description": "Contains broad setup, tone, or non-critical plot information.",
	},
	"Major": {
	"emoji": "🔴",
	"title": "Major Spoiler",
	"description": "May reveal a key twist, death, identity, ending, or outcome.",
	},
	}


	def load_pipeline() -> tuple[object, SentenceTransformer, list[str]]:
	payload = joblib.load(MODEL_PATH)
	metadata = payload["metadata"]
	model = payload["model"]
	embedder = SentenceTransformer(metadata["embedding_model"])
	return model, embedder, metadata["label_classes"]


	CLASSIFIER, EMBEDDER, LABEL_CLASSES = load_pipeline()


	def confidence_from_model(model: object, embedding: np.ndarray, label_id: int) -> float:
	if hasattr(model, "predict_proba"):
	probabilities = model.predict_proba(embedding)[0]
	return float(probabilities[label_id])
	if hasattr(model, "decision_function"):
	scores = model.decision_function(embedding)
	if scores.ndim == 1:
	scores = scores.reshape(1, -1)
	shifted = scores[0] - np.max(scores[0])
	probabilities = np.exp(shifted) / np.exp(shifted).sum()
	return float(probabilities[label_id])
	return 0.0


	def analyze_review(review: str) -> tuple[str, str, str]:
	text = review.strip()
	if not text:
	return "Paste a movie review first.", "", ""

	embedding = EMBEDDER.encode(
	[text],
	convert_to_numpy=True,
	normalize_embeddings=True,
	)
	label_id = int(CLASSIFIER.predict(embedding)[0])
	label = LABEL_CLASSES[label_id]
	confidence = confidence_from_model(CLASSIFIER, embedding, label_id)
	details = LABEL_DETAILS[label]

	result = f"{details['emoji']} {details['title']} ({confidence:.0%})"
	explanation = details["description"]
	original = text
	return result, explanation, original


	EXAMPLES = [
	[
	"The performances are excellent and the pacing is tense throughout, but I can recommend it without saying anything about the plot."
	],
	[
	"The second act has a tense confrontation that changes how the hero sees their mission, but the movie saves its biggest answers for later."
	],
	[
	"The final twist reveals that the hero's closest friend was secretly working for the villain the entire time."
	],
	]


	with gr.Blocks(title="Multi-Source Spoiler Detector") as demo:
	gr.Markdown("# Multi-Source Spoiler Detector")
	gr.Markdown("Classify a movie review as Safe, Mild Spoiler, or Major Spoiler.")

	with gr.Row():
	review_input = gr.Textbox(
	label="Movie review",
	placeholder="Paste a movie review here...",
	lines=8,
	)

	analyze_button = gr.Button("Analyze", variant="primary")
	result_output = gr.Textbox(label="Result", interactive=False)
	explanation_output = gr.Textbox(label="Why", interactive=False)

	with gr.Accordion("Original text", open=False):
	original_output = gr.Textbox(label="Original", lines=6, interactive=False)

	gr.Examples(
	examples=EXAMPLES,
	inputs=review_input,
	)

	analyze_button.click(
	analyze_review,
	inputs=review_input,
	outputs=[result_output, explanation_output, original_output],
	)


	if __name__ == "__main__":
	demo.launch()