Spaces:

Chanlefe
/

meme-analyzer

Sleeping

App Files Files Community

meme-analyzer / app.py

Chanlefe

Update app.py

19cc904 verified 7 months ago

raw

history blame contribute delete

6.1 kB

	import gradio as gr
	import torch
	import torch.nn as nn
	from transformers import pipeline, BertTokenizer, CLIPProcessor
	from PIL import Image
	import pytesseract
	import cv2
	import numpy as np

	# Initialize OCR
	# Note: You need to install tesseract-ocr on your system
	# For Hugging Face Spaces, add: apt-get install -y tesseract-ocr
	# to a file called packages.txt

	class MemeAnalyzerWithOCR:
	def __init__(self):
	# Sentiment Analysis for text (Positive, Negative, Neutral)
	self.text_classifier = pipeline(
	"sentiment-analysis",
	model="cardiffnlp/twitter-roberta-base-sentiment-latest"
	)

	# Hate Speech Detection for the complete meme
	self.hate_detector = pipeline(
	"text-classification",
	model="unitary/toxic-bert"
	)

	# Image understanding (not specifically for hate, but for context)
	self.image_classifier = pipeline(
	"image-classification",
	model="google/vit-base-patch16-224"
	)

	def extract_text_from_image(self, image):
	"""Extract text from meme using OCR"""
	try:
	# Convert PIL to opencv format
	image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

	# Preprocess image for better OCR
	gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
	# Increase contrast
	enhanced = cv2.convertScaleAbs(gray, alpha=1.5, beta=0)

	# Extract text
	text = pytesseract.image_to_string(enhanced)
	return text.strip()
	except Exception as e:
	return ""

	def analyze_meme(self, text_input, image):
	results = {
	'extracted_text': '',
	'sentiment': None,
	'hate_detection': None,
	'image_content': None,
	'combined_analysis': ''
	}

	# Step 1: Extract text from image if provided
	if image is not None:
	extracted_text = self.extract_text_from_image(image)
	results['extracted_text'] = extracted_text

	# Analyze image content
	image_results = self.image_classifier(image)
	results['image_content'] = image_results[0]['label']

	# Step 2: Combine manual text input with OCR text
	combined_text = ""
	if text_input:
	combined_text = text_input
	if results['extracted_text']:
	combined_text = combined_text + " " + results['extracted_text'] if combined_text else results['extracted_text']

	if not combined_text:
	return "No text found! Please provide text or an image with text."

	# Step 3: Sentiment Analysis (Positive, Negative, Neutral)
	sentiment_result = self.text_classifier(combined_text)[0]

	# Map to your categories
	sentiment_mapping = {
	'positive': 'Positive',
	'negative': 'Negative',
	'neutral': 'Neutral'
	}

	results['sentiment'] = {
	'label': sentiment_mapping.get(sentiment_result['label'].lower(), 'Neutral'),
	'score': sentiment_result['score']
	}

	# Step 4: Hate Speech Detection
	hate_result = self.hate_detector(combined_text)[0]

	# Determine if hateful
	is_hateful = hate_result['label'] == 'TOXIC' and hate_result['score'] > 0.7
	results['hate_detection'] = {
	'label': 'Hateful' if is_hateful else 'Non-hateful',
	'score': hate_result['score'] if is_hateful else 1 - hate_result['score']
	}

	# Step 5: Format results
	output = "## 📊 Meme Analysis Results\n\n"

	# Show extracted text
	if results['extracted_text']:
	output += f"### 🔍 Text Extracted from Image (OCR):\n`{results['extracted_text']}`\n\n"

	# Sentiment Analysis
	output += f"### 😊 Sentiment Analysis (BERT):\n"
	output += f"{results['sentiment']['label']} "
	output += f"(Confidence: {results['sentiment']['score']:.1%})\n\n"

	# Hate Detection
	output += f"### 🚫 Hate Speech Detection:\n"
	output += f"{results['hate_detection']['label']} "
	output += f"(Confidence: {results['hate_detection']['score']:.1%})\n\n"

	# Image content
	if results['image_content']:
	output += f"### 🖼️ Image Content:\n{results['image_content']}\n\n"

	# Combined analysis
	output += "### 📝 Analyzed Text:\n"
	output += f"`{combined_text}`\n\n"

	# Warning for hateful content
	if is_hateful:
	output += "⚠️ Warning: This content may contain hateful or offensive material.\n"

	return output

	# Initialize analyzer
	analyzer = MemeAnalyzerWithOCR()

	# Create Gradio interface
	demo = gr.Interface(
	fn=analyzer.analyze_meme,
	inputs=[
	gr.Textbox(
	label="📝 Manual Text Input (Optional)",
	placeholder="Enter text if not in image...",
	),
	gr.Image(
	label="📸 Upload Meme Image",
	type="pil",
	)
	],
	outputs=gr.Markdown(label="Analysis Results"),
	title="🎭 Meme Analyzer with OCR",
	description="""
	This tool analyzes memes by:
	1. Extracting text from images using OCR
	2. Sentiment analysis (Positive/Negative/Neutral) using BERT
	3. Hate speech detection (Hateful/Non-hateful)
	4. Image content analysis

	Upload a meme image and/or provide text to analyze!
	""",
	examples=[
	["This is hilarious!", None],
	["I hate everyone", None]
	],
	theme=gr.themes.Soft()
	)

	# Launch
	demo.launch()

	# For Hugging Face Spaces, create these additional files:

	# requirements.txt:
	"""
	gradio
	torch
	transformers
	pillow
	opencv-python
	pytesseract
	numpy
	"""

	# packages.txt (for system dependencies):
	"""
	tesseract-ocr
	"""