Spaces:
Runtime error
Runtime error
| from agency_swarm.tools import BaseTool | |
| from pydantic import Field | |
| import nltk | |
| from nltk.sentiment import SentimentIntensityAnalyzer | |
| from sklearn.feature_extraction.text import CountVectorizer | |
| from sklearn.decomposition import LatentDirichletAllocation | |
| import spacy | |
| # Ensure necessary NLTK data is downloaded | |
| nltk.download('vader_lexicon') | |
| class FeedbackTextAnalyzer(BaseTool): | |
| """ | |
| This tool uses text analysis libraries to process and analyze feedback data. | |
| It identifies key themes, sentiments, and actionable insights from the feedback. | |
| The tool handles text preprocessing, sentiment analysis, and theme extraction. | |
| """ | |
| feedback_data: list = Field( | |
| ..., description="A list of feedback text data to be analyzed." | |
| ) | |
| n_themes: int = Field( | |
| 5, description="The number of key themes to extract from the feedback data." | |
| ) | |
| def run(self): | |
| """ | |
| Processes and analyzes the provided feedback data. | |
| Identifies key themes, sentiments, and actionable insights. | |
| """ | |
| # Load spaCy model for text preprocessing | |
| nlp = spacy.load("en_core_web_sm") | |
| # Preprocess feedback data | |
| processed_feedback = [self._preprocess_text(nlp, text) for text in self.feedback_data] | |
| # Perform sentiment analysis | |
| sentiments = [self._analyze_sentiment(text) for text in processed_feedback] | |
| # Extract key themes | |
| themes = self._extract_themes(processed_feedback) | |
| return { | |
| "sentiments": sentiments, | |
| "themes": themes | |
| } | |
| def _preprocess_text(self, nlp, text): | |
| """ | |
| Preprocesses the text data using spaCy for lemmatization and stop word removal. | |
| """ | |
| doc = nlp(text) | |
| return " ".join([token.lemma_ for token in doc if not token.is_stop and not token.is_punct]) | |
| def _analyze_sentiment(self, text): | |
| """ | |
| Analyzes the sentiment of the text using NLTK's SentimentIntensityAnalyzer. | |
| """ | |
| sia = SentimentIntensityAnalyzer() | |
| sentiment_scores = sia.polarity_scores(text) | |
| return sentiment_scores | |
| def _extract_themes(self, processed_feedback): | |
| """ | |
| Extracts key themes from the feedback data using LDA. | |
| """ | |
| vectorizer = CountVectorizer(max_df=0.95, min_df=2, stop_words='english') | |
| dtm = vectorizer.fit_transform(processed_feedback) | |
| lda = LatentDirichletAllocation(n_components=self.n_themes, random_state=42) | |
| lda.fit(dtm) | |
| themes = [] | |
| for index, topic in enumerate(lda.components_): | |
| top_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-10:]] | |
| themes.append({"theme": index, "keywords": top_words}) | |
| return themes |