fb / app /services /ai_processor.py
rastof9's picture
test
f788a29
from pathlib import Path
import logging
import json
from transformers import pipeline
from textblob import TextBlob
import spacy
import re
logger = logging.getLogger(__name__)
class ProcessingError(Exception):
"""Exception raised when ad processing fails."""
pass
class AIPipeline:
def __init__(self):
"""Initialize the AI pipeline with necessary models."""
try:
# Load spaCy model for NER and topic extraction
self.nlp = spacy.load('en_core_web_sm')
# Initialize sentiment analyzer
self.sentiment = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
logger.info("AI Pipeline initialized successfully")
except Exception as e:
logger.error(f"Error initializing AI Pipeline: {e}")
raise
def _analyze_sentiment(self, text: str) -> float:
"""Analyze sentiment of text and return a score between -1 and 1."""
try:
# Use transformers for initial sentiment
result = self.sentiment(text)[0]
# Convert POSITIVE/NEGATIVE to float
if result['label'] == 'POSITIVE':
score = result['score']
else:
score = -result['score']
# Use TextBlob for additional nuance
blob = TextBlob(text)
blob_score = blob.sentiment.polarity
# Average the scores
final_score = (score + blob_score) / 2
return final_score
except Exception as e:
logger.error(f"Error in sentiment analysis: {e}")
return 0.0
def _extract_topics(self, text: str) -> list:
"""Extract main topics from text."""
try:
doc = self.nlp(text)
# Extract noun phrases as potential topics
noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]
# Extract named entities that might be topics
entities = [ent.text.lower() for ent in doc.ents
if ent.label_ in ['ORG', 'PRODUCT', 'EVENT', 'WORK_OF_ART']]
# Combine and clean topics
all_topics = noun_phrases + entities
# Clean and filter topics
cleaned_topics = []
for topic in all_topics:
# Remove special characters and extra whitespace
topic = re.sub(r'[^\w\s]', '', topic)
topic = ' '.join(topic.split())
# Filter out short or common words
if len(topic) > 3 and topic not in ['the', 'this', 'that', 'these', 'those']:
cleaned_topics.append(topic)
# Remove duplicates and limit to top 5
unique_topics = list(set(cleaned_topics))
return sorted(unique_topics)[:5]
except Exception as e:
logger.error(f"Error in topic extraction: {e}")
return []
def _extract_entities(self, text: str) -> list:
"""Extract named entities from text."""
try:
doc = self.nlp(text)
entities = []
for ent in doc.ents:
entity = {
'text': ent.text,
'type': ent.label_,
'description': spacy.explain(ent.label_)
}
entities.append(entity)
return entities
except Exception as e:
logger.error(f"Error in entity extraction: {e}")
return []
def process_ad(self, ad) -> dict:
"""Process an ad and return analysis results."""
try:
# Ensure we have content to analyze
if not hasattr(ad, 'content') or not ad.content:
return {
'sentiment': 0.0,
'topics': [],
'entities': []
}
# Analyze sentiment
sentiment = self._analyze_sentiment(ad.content)
# Extract topics
topics = self._extract_topics(ad.content)
# Extract entities
entities = self._extract_entities(ad.content)
return {
'sentiment': sentiment,
'topics': topics,
'entities': entities
}
except Exception as e:
logger.error(f"Error in ad processing: {e}")
return {
'sentiment': 0.0,
'topics': [],
'entities': []
}