Spaces:

1ETERNAL
/

Sentilyze-APP

Runtime error

Sentilyze-APP / backend /sarcasm_detector.py

Numan sheikh

Upload Sentilyze app files to Hugging Face Space

7fb74eb 8 months ago

4.49 kB

	# backend/sarcasm_detector.py

	from transformers import pipeline

	# Initialize the sarcasm/irony detection pipeline once when the module is loaded
	_sarcasm_pipeline = None

	def _load_sarcasm_pipeline():
	"""Loads the sarcasm/irony detection pipeline if not already loaded."""
	global _sarcasm_pipeline
	if _sarcasm_pipeline is None:
	try:
	# Using cardiffnlp/twitter-roberta-base-irony for general irony/sarcasm detection
	_sarcasm_pipeline = pipeline("text-classification", model="cardiffnlp/twitter-roberta-base-irony")
	print("Sarcasm/Irony detection pipeline loaded successfully.")
	except Exception as e:
	print(f"Could not load sarcasm/irony detection pipeline. Ensure 'transformers' and 'torch' are installed. Error: {e}")
	_sarcasm_pipeline = None
	return _sarcasm_pipeline

	def detect_sarcasm_and_highlight(sentence: str) -> dict:
	"""
	Detects sarcasm/irony percentage and attempts to identify sarcastic/joke parts.
	NOTE: Highlighting specific parts of a sentence for sarcasm/jokes is complex
	and often requires attention mechanisms or custom token-level analysis
	from a specialized NLP model. This implementation focuses on the
	sarcasm percentage and provides a placeholder for highlighting.

	Args:
	sentence (str): The input sentence to analyze.

	Returns:
	dict: A dictionary containing the sarcasm percentage and
	a highlighted version of the sentence (simplified for now).
	"""
	pipeline_instance = _load_sarcasm_pipeline()
	if not isinstance(sentence, str) or not pipeline_instance:
	return {"sarcasm_percent": 0.0, "highlighted_sentence": sentence, "note": "Model not loaded or invalid input."}

	results = pipeline_instance(sentence)
	sarcasm_percent = 0.0
	predicted_label = "not sarcastic" # Default label

	if results:
	# This model (cardiffnlp/twitter-roberta-base-irony)
	# returns 'irony' for ironic/sarcastic and 'non_irony' for non-ironic/non-sarcastic.
	main_result = results[0]
	predicted_label_raw = main_result['label'] # e.g., 'irony' or 'non_irony'
	score = main_result['score']

	if predicted_label_raw == 'irony': # This model's label for irony/sarcasm
	sarcasm_percent = round(score * 100, 2)
	predicted_label = "sarcastic" # Make it more readable for the UI
	elif predicted_label_raw == 'non_irony': # This model's label for non-ironic/non-sarcastic
	# If it's 'non_irony', the score is confidence in NOT_SARCASM.
	# So, sarcasm_percent is (1 - score) * 100.
	sarcasm_percent = round((1 - score) * 100, 2)
	predicted_label = "not sarcastic" # Make it more readable for the UI
	else:
	sarcasm_percent = 0.0
	predicted_label = "unknown label" # Fallback for unexpected labels (shouldn't happen with this model)


	# For highlighting, a simple approach: if sarcasm is detected above a threshold,
	# we can wrap the whole sentence or specific keywords.
	highlighted_sentence = sentence
	if sarcasm_percent > 50: # Arbitrary threshold for highlighting
	highlighted_sentence = f"<mark>{sentence}</mark>" # Simple HTML-like highlighting

	return {
	"sarcasm_percent": sarcasm_percent,
	"highlighted_sentence": highlighted_sentence,
	"predicted_sarcasm_label": predicted_label # More descriptive label for the frontend
	}

	# Example Usage (for testing this module independently)
	if __name__ == "__main__":
	print("--- Testing Sarcasm/Irony Detection (New Model) ---")
	# This sentence should now ideally be detected as sarcastic
	sentence1 = "Wow, I just love waiting in traffic for two hours—it’s the highlight of my day!"
	sentence2 = "Oh, great, another Monday."
	sentence3 = "I just love getting stuck in traffic for hours."
	sentence4 = "The sun is shining brightly today." # Should be not sarcastic
	sentence5 = "You're a genius! (said after someone made a foolish mistake)" # Clearly sarcastic

	print(f"'{sentence1}' -> {detect_sarcasm_and_highlight(sentence1)}")
	print(f"'{sentence2}' -> {detect_sarcasm_and_highlight(sentence2)}")
	print(f"'{sentence3}' -> {detect_sarcasm_and_highlight(sentence3)}")
	print(f"'{sentence4}' -> {detect_sarcasm_and_highlight(sentence4)}")
	print(f"'{sentence5}' -> {detect_sarcasm_and_highlight(sentence5)}")