File size: 4,746 Bytes
d488241
 
f788a29
 
 
 
 
d488241
 
 
092e58d
 
 
 
d488241
 
f788a29
d488241
f788a29
 
d488241
f788a29
 
 
 
d488241
 
 
 
f788a29
 
d488241
f788a29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d488241
f788a29
 
d488241
f788a29
 
d488241
f788a29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d488241
f788a29
 
d488241
f788a29
 
d488241
f788a29
 
 
 
 
 
 
 
 
 
 
 
d488241
f788a29
 
d488241
f788a29
 
d488241
f788a29
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d488241
f788a29
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
from pathlib import Path
import logging
import json
from transformers import pipeline
from textblob import TextBlob
import spacy
import re

logger = logging.getLogger(__name__)

class ProcessingError(Exception):
    """Exception raised when ad processing fails."""
    pass

class AIPipeline:
    def __init__(self):
        """Initialize the AI pipeline with necessary models."""
        try:
            # Load spaCy model for NER and topic extraction
            self.nlp = spacy.load('en_core_web_sm')
            
            # Initialize sentiment analyzer
            self.sentiment = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
            
            logger.info("AI Pipeline initialized successfully")
        except Exception as e:
            logger.error(f"Error initializing AI Pipeline: {e}")
            raise

    def _analyze_sentiment(self, text: str) -> float:
        """Analyze sentiment of text and return a score between -1 and 1."""
        try:
            # Use transformers for initial sentiment
            result = self.sentiment(text)[0]
            
            # Convert POSITIVE/NEGATIVE to float
            if result['label'] == 'POSITIVE':
                score = result['score']
            else:
                score = -result['score']
            
            # Use TextBlob for additional nuance
            blob = TextBlob(text)
            blob_score = blob.sentiment.polarity
            
            # Average the scores
            final_score = (score + blob_score) / 2
            
            return final_score
        except Exception as e:
            logger.error(f"Error in sentiment analysis: {e}")
            return 0.0

    def _extract_topics(self, text: str) -> list:
        """Extract main topics from text."""
        try:
            doc = self.nlp(text)
            
            # Extract noun phrases as potential topics
            noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]
            
            # Extract named entities that might be topics
            entities = [ent.text.lower() for ent in doc.ents 
                      if ent.label_ in ['ORG', 'PRODUCT', 'EVENT', 'WORK_OF_ART']]
            
            # Combine and clean topics
            all_topics = noun_phrases + entities
            
            # Clean and filter topics
            cleaned_topics = []
            for topic in all_topics:
                # Remove special characters and extra whitespace
                topic = re.sub(r'[^\w\s]', '', topic)
                topic = ' '.join(topic.split())
                
                # Filter out short or common words
                if len(topic) > 3 and topic not in ['the', 'this', 'that', 'these', 'those']:
                    cleaned_topics.append(topic)
            
            # Remove duplicates and limit to top 5
            unique_topics = list(set(cleaned_topics))
            return sorted(unique_topics)[:5]
        except Exception as e:
            logger.error(f"Error in topic extraction: {e}")
            return []

    def _extract_entities(self, text: str) -> list:
        """Extract named entities from text."""
        try:
            doc = self.nlp(text)
            
            entities = []
            for ent in doc.ents:
                entity = {
                    'text': ent.text,
                    'type': ent.label_,
                    'description': spacy.explain(ent.label_)
                }
                entities.append(entity)
            
            return entities
        except Exception as e:
            logger.error(f"Error in entity extraction: {e}")
            return []

    def process_ad(self, ad) -> dict:
        """Process an ad and return analysis results."""
        try:
            # Ensure we have content to analyze
            if not hasattr(ad, 'content') or not ad.content:
                return {
                    'sentiment': 0.0,
                    'topics': [],
                    'entities': []
                }
            
            # Analyze sentiment
            sentiment = self._analyze_sentiment(ad.content)
            
            # Extract topics
            topics = self._extract_topics(ad.content)
            
            # Extract entities
            entities = self._extract_entities(ad.content)
            
            return {
                'sentiment': sentiment,
                'topics': topics,
                'entities': entities
            }
        except Exception as e:
            logger.error(f"Error in ad processing: {e}")
            return {
                'sentiment': 0.0,
                'topics': [],
                'entities': []
            }