Spaces:

rastof9
/

fb

Runtime error

App Files Files Community

fb / app /services /ai_processor.py

rastof9

test

f788a29 10 months ago

raw

history blame contribute delete

4.75 kB

	from pathlib import Path
	import logging
	import json
	from transformers import pipeline
	from textblob import TextBlob
	import spacy
	import re

	logger = logging.getLogger(__name__)

	class ProcessingError(Exception):
	"""Exception raised when ad processing fails."""
	pass

	class AIPipeline:
	def __init__(self):
	"""Initialize the AI pipeline with necessary models."""
	try:
	# Load spaCy model for NER and topic extraction
	self.nlp = spacy.load('en_core_web_sm')

	# Initialize sentiment analyzer
	self.sentiment = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')

	logger.info("AI Pipeline initialized successfully")
	except Exception as e:
	logger.error(f"Error initializing AI Pipeline: {e}")
	raise

	def _analyze_sentiment(self, text: str) -> float:
	"""Analyze sentiment of text and return a score between -1 and 1."""
	try:
	# Use transformers for initial sentiment
	result = self.sentiment(text)[0]

	# Convert POSITIVE/NEGATIVE to float
	if result['label'] == 'POSITIVE':
	score = result['score']
	else:
	score = -result['score']

	# Use TextBlob for additional nuance
	blob = TextBlob(text)
	blob_score = blob.sentiment.polarity

	# Average the scores
	final_score = (score + blob_score) / 2

	return final_score
	except Exception as e:
	logger.error(f"Error in sentiment analysis: {e}")
	return 0.0

	def _extract_topics(self, text: str) -> list:
	"""Extract main topics from text."""
	try:
	doc = self.nlp(text)

	# Extract noun phrases as potential topics
	noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]

	# Extract named entities that might be topics
	entities = [ent.text.lower() for ent in doc.ents
	if ent.label_ in ['ORG', 'PRODUCT', 'EVENT', 'WORK_OF_ART']]

	# Combine and clean topics
	all_topics = noun_phrases + entities

	# Clean and filter topics
	cleaned_topics = []
	for topic in all_topics:
	# Remove special characters and extra whitespace
	topic = re.sub(r'[^\w\s]', '', topic)
	topic = ' '.join(topic.split())

	# Filter out short or common words
	if len(topic) > 3 and topic not in ['the', 'this', 'that', 'these', 'those']:
	cleaned_topics.append(topic)

	# Remove duplicates and limit to top 5
	unique_topics = list(set(cleaned_topics))
	return sorted(unique_topics)[:5]
	except Exception as e:
	logger.error(f"Error in topic extraction: {e}")
	return []

	def _extract_entities(self, text: str) -> list:
	"""Extract named entities from text."""
	try:
	doc = self.nlp(text)

	entities = []
	for ent in doc.ents:
	entity = {
	'text': ent.text,
	'type': ent.label_,
	'description': spacy.explain(ent.label_)
	}
	entities.append(entity)

	return entities
	except Exception as e:
	logger.error(f"Error in entity extraction: {e}")
	return []

	def process_ad(self, ad) -> dict:
	"""Process an ad and return analysis results."""
	try:
	# Ensure we have content to analyze
	if not hasattr(ad, 'content') or not ad.content:
	return {
	'sentiment': 0.0,
	'topics': [],
	'entities': []
	}

	# Analyze sentiment
	sentiment = self._analyze_sentiment(ad.content)

	# Extract topics
	topics = self._extract_topics(ad.content)

	# Extract entities
	entities = self._extract_entities(ad.content)

	return {
	'sentiment': sentiment,
	'topics': topics,
	'entities': entities
	}
	except Exception as e:
	logger.error(f"Error in ad processing: {e}")
	return {
	'sentiment': 0.0,
	'topics': [],
	'entities': []
	}