vn6295337 Claude Opus 4.5 commited on
Commit
4ec98e2
·
1 Parent(s): a2c9702

Add VADER sentiment analysis for news and reddit

Browse files

- Add VADER helper functions (_get_vader, _compute_vader_sentiment)
- Extract VADER scores from news headlines in _extract_key_metrics
- Extract VADER scores from reddit post titles
- Update metric reference table to include sentiment metrics (M##)
- Display VADER breakdown in formatted prompt output

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

Files changed (1) hide show
  1. src/nodes/analyzer.py +124 -4
src/nodes/analyzer.py CHANGED
@@ -3,6 +3,62 @@ from langsmith import traceable
3
  import time
4
  import json
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
  # Financial institution detection for EV/EBITDA exclusion
8
  FINANCIAL_SECTORS = {
@@ -571,21 +627,35 @@ def _extract_key_metrics(raw_data: str) -> dict:
571
  "unemployment": macro_metrics.get("unemployment", {}).get("value"),
572
  }
573
 
574
- # Extract news
575
  news = metrics.get("news", {})
576
  if news and "error" not in news:
577
  articles = news.get("articles", [])
 
 
 
 
 
578
  extracted["news"] = {
579
  "article_count": len(articles),
580
  "headlines": [a.get("title", "")[:100] for a in articles[:5]],
 
581
  }
582
 
583
- # Extract sentiment
584
  sent = metrics.get("sentiment", {})
585
  if sent and "error" not in sent:
 
 
 
 
 
 
 
586
  extracted["sentiment"] = {
587
  "composite_score": sent.get("composite_score"),
588
  "overall_category": sent.get("overall_swot_category"),
 
589
  }
590
 
591
  return extracted
@@ -700,16 +770,21 @@ def _format_metrics_for_prompt(extracted: dict, is_financial: bool = False) -> s
700
  lines.append(f"- Unemployment: {macro['unemployment']:.1f}%")
701
  lines.append("")
702
 
703
- # News
704
  news = extracted.get("news", {})
705
  if news:
706
  lines.append("=== RECENT NEWS ===")
707
  lines.append(f"- Articles found: {news.get('article_count', 0)}")
 
 
 
 
 
708
  for headline in news.get("headlines", []):
709
  lines.append(f" • {headline}")
710
  lines.append("")
711
 
712
- # Sentiment
713
  sent = extracted.get("sentiment", {})
714
  if sent:
715
  lines.append("=== MARKET SENTIMENT ===")
@@ -717,6 +792,11 @@ def _format_metrics_for_prompt(extracted: dict, is_financial: bool = False) -> s
717
  lines.append(f"- Composite Score: {sent['composite_score']:.2f}")
718
  if sent.get("overall_category"):
719
  lines.append(f"- Overall: {sent['overall_category']}")
 
 
 
 
 
720
  lines.append("")
721
 
722
  # Pre-built SWOT hints from MCP servers
@@ -875,6 +955,46 @@ def _generate_metric_reference_table(extracted: dict, is_financial: bool = False
875
  lines.extend(category_lines)
876
  lines.append("")
877
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
878
  lines.append("=" * 60)
879
  lines.append("")
880
 
 
3
  import time
4
  import json
5
 
6
+ # VADER Sentiment Analysis
7
+ from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
8
+
9
+ _vader_analyzer = None
10
+
11
+
12
+ def _get_vader():
13
+ """Lazy-load VADER analyzer (singleton)."""
14
+ global _vader_analyzer
15
+ if _vader_analyzer is None:
16
+ _vader_analyzer = SentimentIntensityAnalyzer()
17
+ return _vader_analyzer
18
+
19
+
20
+ def _compute_vader_sentiment(texts: list) -> dict:
21
+ """
22
+ Compute VADER sentiment scores for a list of texts.
23
+
24
+ Args:
25
+ texts: List of strings (headlines, titles, etc.)
26
+
27
+ Returns:
28
+ {
29
+ "avg_compound": 0.42,
30
+ "min_compound": -0.31,
31
+ "max_compound": 0.78,
32
+ "positive_count": 3,
33
+ "negative_count": 1,
34
+ "neutral_count": 1,
35
+ "total_count": 5
36
+ }
37
+ or None if no texts provided
38
+ """
39
+ if not texts:
40
+ return None
41
+
42
+ vader = _get_vader()
43
+ scores = []
44
+ for text in texts:
45
+ if text and isinstance(text, str):
46
+ score = vader.polarity_scores(text)["compound"]
47
+ scores.append(score)
48
+
49
+ if not scores:
50
+ return None
51
+
52
+ return {
53
+ "avg_compound": round(sum(scores) / len(scores), 3),
54
+ "min_compound": round(min(scores), 3),
55
+ "max_compound": round(max(scores), 3),
56
+ "positive_count": sum(1 for s in scores if s > 0.05),
57
+ "negative_count": sum(1 for s in scores if s < -0.05),
58
+ "neutral_count": sum(1 for s in scores if -0.05 <= s <= 0.05),
59
+ "total_count": len(scores)
60
+ }
61
+
62
 
63
  # Financial institution detection for EV/EBITDA exclusion
64
  FINANCIAL_SECTORS = {
 
627
  "unemployment": macro_metrics.get("unemployment", {}).get("value"),
628
  }
629
 
630
+ # Extract news with VADER sentiment
631
  news = metrics.get("news", {})
632
  if news and "error" not in news:
633
  articles = news.get("articles", [])
634
+ headlines = [a.get("title", "") for a in articles if a.get("title")]
635
+
636
+ # Compute VADER sentiment on headlines
637
+ vader_news = _compute_vader_sentiment(headlines)
638
+
639
  extracted["news"] = {
640
  "article_count": len(articles),
641
  "headlines": [a.get("title", "")[:100] for a in articles[:5]],
642
+ "vader_sentiment": vader_news,
643
  }
644
 
645
+ # Extract sentiment with VADER on reddit posts
646
  sent = metrics.get("sentiment", {})
647
  if sent and "error" not in sent:
648
+ # Get reddit posts for VADER analysis
649
+ reddit_posts = sent.get("reddit_posts", [])
650
+ reddit_titles = [p.get("title", "") for p in reddit_posts if p.get("title")]
651
+
652
+ # Compute VADER sentiment on reddit titles
653
+ vader_reddit = _compute_vader_sentiment(reddit_titles)
654
+
655
  extracted["sentiment"] = {
656
  "composite_score": sent.get("composite_score"),
657
  "overall_category": sent.get("overall_swot_category"),
658
+ "vader_reddit": vader_reddit,
659
  }
660
 
661
  return extracted
 
770
  lines.append(f"- Unemployment: {macro['unemployment']:.1f}%")
771
  lines.append("")
772
 
773
+ # News with VADER sentiment
774
  news = extracted.get("news", {})
775
  if news:
776
  lines.append("=== RECENT NEWS ===")
777
  lines.append(f"- Articles found: {news.get('article_count', 0)}")
778
+ # VADER sentiment scores for news
779
+ vader_news = news.get("vader_sentiment")
780
+ if vader_news:
781
+ lines.append(f"- VADER Sentiment: {vader_news['avg_compound']:.2f} (range: {vader_news['min_compound']:.2f} to {vader_news['max_compound']:.2f})")
782
+ lines.append(f" Breakdown: {vader_news['positive_count']} positive, {vader_news['negative_count']} negative, {vader_news['neutral_count']} neutral")
783
  for headline in news.get("headlines", []):
784
  lines.append(f" • {headline}")
785
  lines.append("")
786
 
787
+ # Sentiment with VADER for reddit
788
  sent = extracted.get("sentiment", {})
789
  if sent:
790
  lines.append("=== MARKET SENTIMENT ===")
 
792
  lines.append(f"- Composite Score: {sent['composite_score']:.2f}")
793
  if sent.get("overall_category"):
794
  lines.append(f"- Overall: {sent['overall_category']}")
795
+ # VADER sentiment scores for reddit
796
+ vader_reddit = sent.get("vader_reddit")
797
+ if vader_reddit:
798
+ lines.append(f"- Reddit VADER: {vader_reddit['avg_compound']:.2f} (range: {vader_reddit['min_compound']:.2f} to {vader_reddit['max_compound']:.2f})")
799
+ lines.append(f" Breakdown: {vader_reddit['positive_count']} positive, {vader_reddit['negative_count']} negative, {vader_reddit['neutral_count']} neutral")
800
  lines.append("")
801
 
802
  # Pre-built SWOT hints from MCP servers
 
955
  lines.extend(category_lines)
956
  lines.append("")
957
 
958
+ # Add VADER sentiment metrics (news and reddit)
959
+ sentiment_lines = []
960
+
961
+ # News VADER sentiment
962
+ news_data = extracted.get("news", {})
963
+ if news_data.get("vader_sentiment"):
964
+ vader = news_data["vader_sentiment"]
965
+ ref_id = f"M{mid:02d}"
966
+ formatted = f"{vader['avg_compound']:.2f}"
967
+ sentiment_lines.append(f" {ref_id}: news_sentiment = {formatted} ({vader['total_count']} articles)")
968
+ lookup[ref_id] = {
969
+ "key": "news_sentiment",
970
+ "raw_value": vader['avg_compound'],
971
+ "formatted": formatted,
972
+ "as_of_date": None,
973
+ "category": "sentiment"
974
+ }
975
+ mid += 1
976
+
977
+ # Reddit VADER sentiment
978
+ sent_data = extracted.get("sentiment", {})
979
+ if sent_data.get("vader_reddit"):
980
+ vader = sent_data["vader_reddit"]
981
+ ref_id = f"M{mid:02d}"
982
+ formatted = f"{vader['avg_compound']:.2f}"
983
+ sentiment_lines.append(f" {ref_id}: reddit_sentiment = {formatted} ({vader['total_count']} posts)")
984
+ lookup[ref_id] = {
985
+ "key": "reddit_sentiment",
986
+ "raw_value": vader['avg_compound'],
987
+ "formatted": formatted,
988
+ "as_of_date": None,
989
+ "category": "sentiment"
990
+ }
991
+ mid += 1
992
+
993
+ if sentiment_lines:
994
+ lines.append("[SENTIMENT]")
995
+ lines.extend(sentiment_lines)
996
+ lines.append("")
997
+
998
  lines.append("=" * 60)
999
  lines.append("")
1000