Spaces:

rastof9
/

fb

Runtime error

File size: 4,746 Bytes

from pathlib import Path
import logging
import json
from transformers import pipeline
from textblob import TextBlob
import spacy
import re

logger = logging.getLogger(__name__)

class ProcessingError(Exception):
    """Exception raised when ad processing fails."""
    pass

class AIPipeline:
    def __init__(self):
        """Initialize the AI pipeline with necessary models."""
        try:
            # Load spaCy model for NER and topic extraction
            self.nlp = spacy.load('en_core_web_sm')
            
            # Initialize sentiment analyzer
            self.sentiment = pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')
            
            logger.info("AI Pipeline initialized successfully")
        except Exception as e:
            logger.error(f"Error initializing AI Pipeline: {e}")
            raise

    def _analyze_sentiment(self, text: str) -> float:
        """Analyze sentiment of text and return a score between -1 and 1."""
        try:
            # Use transformers for initial sentiment
            result = self.sentiment(text)[0]
            
            # Convert POSITIVE/NEGATIVE to float
            if result['label'] == 'POSITIVE':
                score = result['score']
            else:
                score = -result['score']
            
            # Use TextBlob for additional nuance
            blob = TextBlob(text)
            blob_score = blob.sentiment.polarity
            
            # Average the scores
            final_score = (score + blob_score) / 2
            
            return final_score
        except Exception as e:
            logger.error(f"Error in sentiment analysis: {e}")
            return 0.0

    def _extract_topics(self, text: str) -> list:
        """Extract main topics from text."""
        try:
            doc = self.nlp(text)
            
            # Extract noun phrases as potential topics
            noun_phrases = [chunk.text.lower() for chunk in doc.noun_chunks]
            
            # Extract named entities that might be topics
            entities = [ent.text.lower() for ent in doc.ents 
                      if ent.label_ in ['ORG', 'PRODUCT', 'EVENT', 'WORK_OF_ART']]
            
            # Combine and clean topics
            all_topics = noun_phrases + entities
            
            # Clean and filter topics
            cleaned_topics = []
            for topic in all_topics:
                # Remove special characters and extra whitespace
                topic = re.sub(r'[^\w\s]', '', topic)
                topic = ' '.join(topic.split())
                
                # Filter out short or common words
                if len(topic) > 3 and topic not in ['the', 'this', 'that', 'these', 'those']:
                    cleaned_topics.append(topic)
            
            # Remove duplicates and limit to top 5
            unique_topics = list(set(cleaned_topics))
            return sorted(unique_topics)[:5]
        except Exception as e:
            logger.error(f"Error in topic extraction: {e}")
            return []

    def _extract_entities(self, text: str) -> list:
        """Extract named entities from text."""
        try:
            doc = self.nlp(text)
            
            entities = []
            for ent in doc.ents:
                entity = {
                    'text': ent.text,
                    'type': ent.label_,
                    'description': spacy.explain(ent.label_)
                }
                entities.append(entity)
            
            return entities
        except Exception as e:
            logger.error(f"Error in entity extraction: {e}")
            return []

    def process_ad(self, ad) -> dict:
        """Process an ad and return analysis results."""
        try:
            # Ensure we have content to analyze
            if not hasattr(ad, 'content') or not ad.content:
                return {
                    'sentiment': 0.0,
                    'topics': [],
                    'entities': []
                }
            
            # Analyze sentiment
            sentiment = self._analyze_sentiment(ad.content)
            
            # Extract topics
            topics = self._extract_topics(ad.content)
            
            # Extract entities
            entities = self._extract_entities(ad.content)
            
            return {
                'sentiment': sentiment,
                'topics': topics,
                'entities': entities
            }
        except Exception as e:
            logger.error(f"Error in ad processing: {e}")
            return {
                'sentiment': 0.0,
                'topics': [],
                'entities': []
            }