Brajmovech commited on
Commit
8bde49a
Β·
1 Parent(s): e3f4ae3

feat: port LLM-based news filtering from local version

Browse files

- iris_mvp.py: add TICKER_BRAND_TERMS dict, add module-level
llm_filter_headlines() with gpt-4o-mini and keyword fallback,
replace regex-based analyze_news() with two-phase collect β†’ LLM filter
- Dockerfile: add OPENAI_MODEL_FILTER=gpt-4o-mini env var
- run_daily.py: add feedback log URL reminder in docstring

Files changed (3) hide show
  1. Dockerfile +2 -1
  2. iris_mvp.py +281 -200
  3. run_daily.py +4 -0
Dockerfile CHANGED
@@ -5,7 +5,8 @@ RUN useradd -m -u 1000 user
5
  USER user
6
  ENV HOME=/home/user \
7
  PATH=/home/user/.local/bin:$PATH \
8
- DEMO_MODE=true
 
9
 
10
  WORKDIR $HOME/app
11
 
 
5
  USER user
6
  ENV HOME=/home/user \
7
  PATH=/home/user/.local/bin:$PATH \
8
+ DEMO_MODE=true \
9
+ OPENAI_MODEL_FILTER=gpt-4o-mini
10
 
11
  WORKDIR $HOME/app
12
 
iris_mvp.py CHANGED
@@ -57,6 +57,151 @@ COMPANY_NAME_TO_TICKERS = {
57
  "NIKE": ["NKE"],
58
  }
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def normalize_ticker_symbol(symbol: str):
62
  token = str(symbol or "").strip().upper()
@@ -602,248 +747,184 @@ class IRIS_System:
602
  return self._simulated_market_data(ticker)
603
 
604
  def analyze_news(self, ticker):
605
- """Fetches headlines and calculates a Sentiment Score (-1.0 to +1.0)."""
 
 
 
 
606
  ticker_symbol = normalize_ticker_symbol(ticker).upper()
607
- headlines = []
608
- seen = set()
609
-
610
- # Build strict relevance terms: ticker + known company names mapped to this ticker.
611
- relevance_terms = {ticker_symbol}
612
- for company_name, tickers in COMPANY_NAME_TO_TICKERS.items():
613
- normalized = normalize_ticker_list(tickers)
614
- if ticker_symbol in normalized:
615
- relevance_terms.add(str(company_name or "").upper())
616
-
617
- def _normalize_title(t):
618
- """Lowercase, strip punctuation for fuzzy dedup."""
619
- return re.sub(r'[^a-z0-9 ]', '', str(t or '').lower().strip())
620
-
621
- _FINANCIAL_TERMS = re.compile(
622
- r'\b(stock|share|price|market|earn|revenue|profit|loss|invest|'
623
- r'analyst|quarter|fiscal|IPO|valuat|forecast|guidance|trade|'
624
- r'fund|ETF|NYSE|NASDAQ|SEC|CEO|CFO|board|dividend|rally|'
625
- r'downgrade|upgrade|outlook|chip|semiconductor|AI|cloud|'
626
- r'data.?center|GPU|compute)\b',
627
  re.IGNORECASE,
628
  )
629
- _NOISE_PATTERNS = [
630
- r'\b(1080p|720p|480p|2160p|4K|BluRay|WEB-?DL|WEBRip|HDTV|DVDRip|BRRip)\b',
631
- r'\b(x264|x265|H\.?264|H\.?265|HEVC|AVC|AAC|AC3|DTS|FLAC|MP4|MKV|AVI)\b',
632
- r'\b(S\d{2}E\d{2}|S\d{2}-S\d{2})\b',
633
- r'\b(YIFY|RARBG|EZTV|BobDobbs|playWEB|Kitsune|TEPES|RAWR|MiXED|SPARKS)\b',
634
- r'\b(torrent|magnet|repack|proper|extended\.cut|theatrical)\b',
635
- r'(?i)\.\s*(mkv|mp4|avi|mov|wmv|flv)\b',
636
- ]
637
 
638
- def add_relevant_article(title, url="", description="", published_at=""):
639
- if len(headlines) >= 15:
 
 
640
  return
641
- clean_title = str(title or "").strip()
642
- if not clean_title:
643
  return
644
- clean_url = str(url or "").strip()
645
- clean_description = str(description or "").strip()
646
-
647
- combined_text = f"{clean_title} {clean_description}"
648
- is_relevant = False
649
- for term in relevance_terms:
650
- term_upper = str(term or "").upper().strip()
651
- if not term_upper:
652
- continue
653
- pattern = r'\b' + re.escape(term_upper) + r'\b'
654
- if re.search(pattern, combined_text, re.IGNORECASE):
655
- is_relevant = True
656
- if not _FINANCIAL_TERMS.search(combined_text):
657
- is_relevant = False
658
- break
659
- if not is_relevant:
660
  return
661
-
662
- for _pat in _NOISE_PATTERNS:
663
- if re.search(_pat, clean_title, re.IGNORECASE):
664
- return
665
-
666
- # Deduplicate by exact (title, url) and by normalized title
667
- norm_title = _normalize_title(clean_title)
668
- if clean_url in seen or norm_title in seen:
669
  return
670
- seen.add(clean_url)
671
- seen.add(norm_title)
672
-
673
- # Reject known non-article URL patterns (consent pages, trackers, etc.)
674
- if clean_url:
675
- _bad_url_patterns = [
676
- r'consent\.(yahoo|google|msn)\.',
677
- r'/v2/collectConsent',
678
- r'accounts\.google\.com',
679
- r'login\.|signin\.',
680
- r'\btracking\b',
681
- ]
682
- if any(re.search(p, clean_url, re.IGNORECASE) for p in _bad_url_patterns):
683
- return
684
-
685
- # Validate URL: only reject connection/DNS failures and 5xx errors.
686
- # 4xx (including paywalls) are kept β€” users can still open those links.
687
- if clean_url:
688
- try:
689
- import urllib.request as _urlreq
690
- req = _urlreq.Request(
691
- clean_url,
692
- headers={'User-Agent': 'Mozilla/5.0 (compatible; IRIS-AI/1.0)'},
693
- )
694
- with _urlreq.urlopen(req, timeout=4) as resp:
695
- if resp.status >= 500:
696
- return # server error β€” drop
697
- except Exception as _e:
698
- err_str = str(_e).lower()
699
- # Drop only on DNS/connection failure; keep on HTTP errors (paywall etc.)
700
- if any(k in err_str for k in ('name or service', 'nodename', 'connection refused',
701
- 'no route', 'network unreachable', 'timed out',
702
- 'ssl', 'certificate')):
703
- return
704
-
705
- headlines.append({
706
- "title": clean_title,
707
- "url": clean_url,
708
  "published_at": str(published_at or "").strip(),
709
  })
710
 
711
- # Preferred source: NewsAPI (if configured) for a larger headline baseline.
712
  if self.news_api:
713
  try:
714
  response = self.news_api.get_everything(
715
- q=ticker,
716
  language="en",
717
  sort_by="publishedAt",
718
- page_size=15,
719
  )
720
- if isinstance(response, dict):
721
- for article in response.get("articles", []) or []:
722
- if not isinstance(article, dict):
723
- continue
724
- title = str(article.get("title", "")).strip()
725
- url = article.get("url", "")
726
- description = article.get("description", "")
727
- add_relevant_article(
728
- title=title,
729
- url=url,
730
- description=description,
731
- published_at=article.get("publishedAt", ""),
732
- )
733
- if len(headlines) >= 15:
734
- break
735
- except Exception:
736
- headlines = []
737
- seen = set()
738
 
739
- # Second source: Webz.io News API (supplements NewsAPI or runs standalone).
740
- if self.webz_api_key and len(headlines) < 15:
741
  try:
742
  import urllib.request as _urlreq, urllib.parse as _urlparse
743
  _params = _urlparse.urlencode({
744
  "token": self.webz_api_key,
745
- "q": f'"{ticker}" language:english',
746
  "sort": "published",
747
  "order": "desc",
748
- "size": 20,
749
  "format": "json",
750
  })
751
- _webz_url = f"https://api.webz.io/newsApiLite?{_params}"
752
- _req = _urlreq.Request(_webz_url, headers={"Accept": "application/json"})
 
 
753
  with _urlreq.urlopen(_req, timeout=8) as _resp:
754
- _webz_data = json.loads(_resp.read().decode("utf-8"))
755
- for _post in _webz_data.get("posts", []) or []:
756
- if not isinstance(_post, dict):
757
  continue
758
- add_relevant_article(
759
- title=_post.get("title", ""),
760
- url=_post.get("url", ""),
761
- description=_post.get("text", "")[:300],
762
- published_at=_post.get("published", ""),
763
  )
764
- if len(headlines) >= 15:
765
- break
766
- except Exception:
767
- pass
768
 
769
- # Fallback: existing yfinance extraction when NewsAPI is unavailable/failed/empty.
770
- if not headlines:
771
  try:
772
  stock = yf.Ticker(ticker)
773
- news_items = stock.news
774
- if news_items:
775
- for item in news_items[:30]:
776
- if not isinstance(item, dict):
777
- continue
778
- content = item.get("content") if isinstance(item.get("content"), dict) else {}
779
- title = item.get("title") or content.get("title") or ""
780
- description = item.get("description") or item.get("summary") or content.get("description") or content.get("summary") or ""
781
- url = item.get("link") or item.get("url") or content.get("link") or content.get("url") or ""
782
- _pub = item.get("providerPublishTime") or \
783
- (item.get("content") or {}).get("pubDate", "")
784
- add_relevant_article(
785
- title=title,
786
- url=url,
787
- description=description,
788
- published_at=_pub,
789
- )
790
- if len(headlines) >= 15:
791
- break
792
- except Exception:
793
- pass
794
-
795
- # Fallback: Simulation Mode (If internet/API failure)
796
- if not headlines:
797
- if ticker == "TSLA":
798
- simulation_items = [
799
  {"title": "Tesla recalls 2 million vehicles due to autopilot risk", "url": ""},
800
  {"title": "Analysts downgrade Tesla stock amid slowing EV demand", "url": ""},
801
- ]
802
- elif ticker == "NVDA":
803
- simulation_items = [
804
- {"title": "Nvidia announces fantastic breakthrough AI chip", "url": ""},
805
- {"title": "Nvidia quarterly revenue brilliantly beats expectations by 20%", "url": ""},
806
- ]
807
- else:
808
- simulation_items = [
809
- {"title": f"{ticker_symbol} announces date for shareholder meeting", "url": ""},
810
- {"title": f"{ticker_symbol} news flow remains active amid market volatility", "url": ""},
811
- ]
812
- for entry in simulation_items:
813
- add_relevant_article(
814
- title=entry.get("title", ""),
815
- url=entry.get("url", ""),
816
- description="",
817
- )
 
 
 
 
818
 
819
- # Analyze Sentiment using FinBERT
820
- total_score = 0
821
- valid_headlines = 0
822
-
 
 
 
 
823
  if self.sentiment_analyzer and headlines:
824
- for headline in headlines:
 
 
 
825
  try:
826
- title_text = str(headline.get("title", "")).strip() if isinstance(headline, dict) else str(headline or "").strip()
827
- if not title_text:
828
- continue
829
- # FinBERT returns labels like 'positive', 'negative', 'neutral'
830
  result = self.sentiment_analyzer(title_text)[0]
831
- label = result['label']
832
- score = result['score'] # Confidence score 0 to 1
833
-
834
- if label == 'positive':
835
  total_score += score
836
- valid_headlines += 1
837
- elif label == 'negative':
838
  total_score -= score
839
- valid_headlines += 1
840
- else: # neutral
841
- valid_headlines += 1
842
- # neutral adds 0 to total score
843
  except Exception:
844
  pass
845
-
846
- avg_score = total_score / valid_headlines if valid_headlines > 0 else 0
847
  return avg_score, headlines
848
 
849
  def predict_trend(self, data, sentiment_score):
 
57
  "NIKE": ["NKE"],
58
  }
59
 
60
+ TICKER_BRAND_TERMS = {
61
+ "AAPL": ["iPhone", "iPad", "MacBook", "Apple Watch", "AirPods",
62
+ "App Store", "Apple Intelligence", "Vision Pro", "iOS",
63
+ "macOS", "Tim Cook"],
64
+ "MSFT": ["Windows", "Azure", "Copilot", "Office", "Xbox",
65
+ "Teams", "Satya Nadella", "GitHub"],
66
+ "NVDA": ["GeForce", "Blackwell", "Hopper", "Jensen Huang",
67
+ "CUDA", "DGX", "NIM"],
68
+ "GOOG": ["Google", "Gemini", "YouTube", "Waymo", "DeepMind",
69
+ "Pixel", "Sundar Pichai", "Android", "Chrome"],
70
+ "AMZN": ["Amazon", "AWS", "Alexa", "Prime", "Kindle",
71
+ "Andy Jassy", "Twitch"],
72
+ "META": ["Facebook", "Instagram", "WhatsApp", "Threads",
73
+ "Zuckerberg", "Ray-Ban", "Llama"],
74
+ "TSLA": ["Tesla", "Cybertruck", "Model 3", "Model Y",
75
+ "Autopilot", "FSD", "Elon Musk", "Gigafactory",
76
+ "Powerwall"],
77
+ "NKE": ["Nike", "Jordan", "Air Max", "Swoosh"],
78
+ }
79
+
80
+
81
+ def llm_filter_headlines(
82
+ ticker: str,
83
+ candidates: list,
84
+ *,
85
+ max_keep: int = 12,
86
+ model=None,
87
+ ) -> list:
88
+ """
89
+ Use an LLM to decide which raw headline candidates are worth
90
+ showing on the IRIS dashboard for the given ticker.
91
+
92
+ Each candidate dict has keys: title, url, published_at.
93
+
94
+ Returns a filtered + ordered list (most relevant first),
95
+ capped at max_keep entries.
96
+
97
+ Falls back to a simple keyword allowlist if:
98
+ - OPENAI_API_KEY is not set
99
+ - the openai package is not installed
100
+ - the API call fails for any reason
101
+ """
102
+ if not candidates:
103
+ return []
104
+
105
+ api_key = os.environ.get("OPENAI_API_KEY", "").strip()
106
+ _model = model or os.environ.get("OPENAI_MODEL_FILTER", "gpt-4o-mini")
107
+
108
+ # ── LLM path ────────────────────────────────────────────────────
109
+ if api_key:
110
+ try:
111
+ from openai import OpenAI as _OAI
112
+ _client = _OAI(api_key=api_key)
113
+
114
+ lines = []
115
+ for i, h in enumerate(candidates):
116
+ title = str(h.get("title", "")).strip()
117
+ lines.append(f"{i}: {title}")
118
+ numbered = "\n".join(lines)
119
+
120
+ prompt = f"""You are a financial news relevance classifier for the stock ticker "{ticker}".
121
+
122
+ Below is a numbered list of raw news headline candidates. Your job is to select which ones belong on a stock market dashboard for "{ticker}".
123
+
124
+ INCLUDE a headline if it is about ANY of:
125
+ - The company, its products, services, executives, or earnings
126
+ - Analyst ratings, price targets, or institutional activity for the stock
127
+ - Direct competitors that affect the stock's valuation
128
+ - Macroeconomic events that move the sector (interest rates, inflation, GDP)
129
+ - Geopolitical events that affect the supply chain, regulation, or demand for this company
130
+ - Industry trends directly relevant to this company's business
131
+
132
+ EXCLUDE a headline if:
133
+ - It is about a completely unrelated company that happens to share a word with the ticker
134
+ - It is entertainment, sports, lifestyle, or celebrity content
135
+ - It is a video/torrent/piracy listing
136
+ - It has no conceivable link to the stock's price or business
137
+
138
+ Respond with ONLY a JSON object in this exact format β€” no markdown, no explanation:
139
+ {{"keep": [list of integer indices to include], "reason": "one sentence summary of what you filtered"}}
140
+
141
+ Headlines:
142
+ {numbered}"""
143
+
144
+ resp = _client.chat.completions.create(
145
+ model=_model,
146
+ messages=[
147
+ {"role": "system", "content": "You are a precise financial news classifier. Output only valid JSON."},
148
+ {"role": "user", "content": prompt},
149
+ ],
150
+ temperature=0.0,
151
+ max_tokens=300,
152
+ )
153
+ raw = (resp.choices[0].message.content or "").strip()
154
+ if raw.startswith("```"):
155
+ raw = raw.split("\n", 1)[-1]
156
+ raw = raw.rsplit("```", 1)[0].strip()
157
+
158
+ parsed = json.loads(raw)
159
+ keep_indices = [int(i) for i in parsed.get("keep", [])]
160
+ reason = parsed.get("reason", "")
161
+ print(f"[LLM FILTER] {ticker}: keeping {len(keep_indices)}/{len(candidates)} "
162
+ f"headlines via {_model}. Reason: {reason}")
163
+
164
+ kept = [candidates[i] for i in keep_indices
165
+ if 0 <= i < len(candidates)]
166
+ return kept[:max_keep]
167
+
168
+ except Exception as _llm_err:
169
+ print(f"[LLM FILTER] API call failed ({type(_llm_err).__name__}: {_llm_err}), "
170
+ f"falling back to keyword filter.")
171
+
172
+ # ── Keyword fallback (no API key or API failure) ─────────────────
173
+ print(f"[LLM FILTER] Using keyword fallback for {ticker}.")
174
+ ticker_upper = ticker.upper()
175
+
176
+ allow_terms = {ticker_upper}
177
+ for company_name, tickers in COMPANY_NAME_TO_TICKERS.items():
178
+ if ticker_upper in normalize_ticker_list(tickers):
179
+ allow_terms.add(company_name.upper())
180
+
181
+ brand_map = globals().get("TICKER_BRAND_TERMS", {})
182
+ for brand in brand_map.get(ticker_upper, []):
183
+ allow_terms.add(str(brand).upper())
184
+
185
+ _NOISE = re.compile(
186
+ r'\b(1080p|720p|BluRay|WEB-?DL|x264|x265|HEVC|S\d{2}E\d{2}|'
187
+ r'torrent|YIFY|RARBG|EZTV|mkv|mp4)\b',
188
+ re.IGNORECASE,
189
+ )
190
+
191
+ kept = []
192
+ for h in candidates:
193
+ title = str(h.get("title", "")).strip()
194
+ if not title:
195
+ continue
196
+ if _NOISE.search(title):
197
+ continue
198
+ title_up = title.upper()
199
+ if any(term in title_up for term in allow_terms):
200
+ kept.append(h)
201
+ if len(kept) >= max_keep:
202
+ break
203
+ return kept
204
+
205
 
206
  def normalize_ticker_symbol(symbol: str):
207
  token = str(symbol or "").strip().upper()
 
747
  return self._simulated_market_data(ticker)
748
 
749
  def analyze_news(self, ticker):
750
+ """
751
+ Fetches raw headlines from all available sources, then uses
752
+ llm_filter_headlines() to select those relevant to the ticker.
753
+ Returns (sentiment_score: float, headlines: list[dict]).
754
+ """
755
  ticker_symbol = normalize_ticker_symbol(ticker).upper()
756
+ raw_candidates = []
757
+ seen_urls = set()
758
+ seen_titles = set()
759
+
760
+ def _norm_title(t):
761
+ return re.sub(r'[^a-z0-9 ]', '', t.lower().strip())
762
+
763
+ def _bad_url(url):
764
+ _BAD = re.compile(
765
+ r'consent\.(yahoo|google|msn)\.|/v2/collectConsent|'
766
+ r'accounts\.google\.com|login\.|signin\.|tracking',
767
+ re.IGNORECASE,
768
+ )
769
+ return bool(_BAD.search(url))
770
+
771
+ _NOISE = re.compile(
772
+ r'\b(1080p|720p|480p|4K|BluRay|WEB-?DL|WEBRip|HDTV|DVDRip|'
773
+ r'x264|x265|H\.?264|H\.?265|HEVC|AAC|AC3|DTS|MKV|AVI|'
774
+ r'S\d{2}E\d{2}|torrent|magnet|repack|'
775
+ r'YIFY|RARBG|EZTV|BobDobbs|playWEB|SPARKS)\b',
776
  re.IGNORECASE,
777
  )
 
 
 
 
 
 
 
 
778
 
779
+ def collect(title, url="", published_at=""):
780
+ """Add a raw candidate after only dedup + piracy checks."""
781
+ title = str(title or "").strip()
782
+ if not title or len(raw_candidates) >= 40:
783
  return
784
+ if _NOISE.search(title):
 
785
  return
786
+ url = str(url or "").strip()
787
+ if url and _bad_url(url):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
788
  return
789
+ norm = _norm_title(title)
790
+ if norm in seen_titles:
 
 
 
 
 
 
791
  return
792
+ if url and url in seen_urls:
793
+ return
794
+ seen_titles.add(norm)
795
+ if url:
796
+ seen_urls.add(url)
797
+ raw_candidates.append({
798
+ "title": title,
799
+ "url": url,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  "published_at": str(published_at or "").strip(),
801
  })
802
 
803
+ # ── Source 1: NewsAPI ────────────────────────────────────────────
804
  if self.news_api:
805
  try:
806
  response = self.news_api.get_everything(
807
+ q=ticker_symbol,
808
  language="en",
809
  sort_by="publishedAt",
810
+ page_size=30,
811
  )
812
+ for article in (response.get("articles") or []):
813
+ if not isinstance(article, dict):
814
+ continue
815
+ collect(
816
+ title=article.get("title", ""),
817
+ url=article.get("url", ""),
818
+ published_at=article.get("publishedAt", ""),
819
+ )
820
+ except Exception as _e:
821
+ print(f"[NEWS] NewsAPI error: {_e}")
 
 
 
 
 
 
 
 
822
 
823
+ # ── Source 2: Webz.io ────────────────────────────────────────────
824
+ if self.webz_api_key and len(raw_candidates) < 30:
825
  try:
826
  import urllib.request as _urlreq, urllib.parse as _urlparse
827
  _params = _urlparse.urlencode({
828
  "token": self.webz_api_key,
829
+ "q": f'"{ticker_symbol}" language:english',
830
  "sort": "published",
831
  "order": "desc",
832
+ "size": 25,
833
  "format": "json",
834
  })
835
+ _req = _urlreq.Request(
836
+ f"https://api.webz.io/newsApiLite?{_params}",
837
+ headers={"Accept": "application/json"},
838
+ )
839
  with _urlreq.urlopen(_req, timeout=8) as _resp:
840
+ _data = json.loads(_resp.read().decode("utf-8"))
841
+ for post in (_data.get("posts") or []):
842
+ if not isinstance(post, dict):
843
  continue
844
+ collect(
845
+ title=post.get("title", ""),
846
+ url=post.get("url", ""),
847
+ published_at=post.get("published", ""),
 
848
  )
849
+ except Exception as _e:
850
+ print(f"[NEWS] Webz.io error: {_e}")
 
 
851
 
852
+ # ── Source 3: yfinance fallback ──────────────────────────────────
853
+ if not raw_candidates:
854
  try:
855
  stock = yf.Ticker(ticker)
856
+ for item in (stock.news or [])[:40]:
857
+ if not isinstance(item, dict):
858
+ continue
859
+ content = item.get("content") or {}
860
+ if not isinstance(content, dict):
861
+ content = {}
862
+ title = (item.get("title") or content.get("title") or "")
863
+ url = (item.get("link") or item.get("url") or
864
+ content.get("link") or content.get("url") or "")
865
+ pub = (item.get("providerPublishTime") or
866
+ content.get("pubDate", ""))
867
+ collect(title=title, url=url, published_at=pub)
868
+ except Exception as _e:
869
+ print(f"[NEWS] yfinance error: {_e}")
870
+
871
+ # ── Source 4: simulation fallback ────────────────────────────────
872
+ if not raw_candidates:
873
+ _sim = {
874
+ "TSLA": [
 
 
 
 
 
 
 
875
  {"title": "Tesla recalls 2 million vehicles due to autopilot risk", "url": ""},
876
  {"title": "Analysts downgrade Tesla stock amid slowing EV demand", "url": ""},
877
+ ],
878
+ "NVDA": [
879
+ {"title": "Nvidia announces breakthrough AI chip", "url": ""},
880
+ {"title": "Nvidia quarterly revenue beats expectations by 20%", "url": ""},
881
+ ],
882
+ }
883
+ for entry in _sim.get(ticker_symbol, [
884
+ {"title": f"{ticker_symbol} announces date for shareholder meeting", "url": ""},
885
+ {"title": f"{ticker_symbol} news flow active amid market volatility", "url": ""},
886
+ ]):
887
+ collect(title=entry["title"], url=entry.get("url", ""))
888
+
889
+ print(f"[NEWS] {ticker_symbol}: {len(raw_candidates)} raw candidates collected.")
890
+
891
+ # ── Phase 2: LLM filter ──────────────────────────────────────────
892
+ headlines = llm_filter_headlines(
893
+ ticker_symbol,
894
+ raw_candidates,
895
+ max_keep=12,
896
+ )
897
+ print(f"[NEWS] {ticker_symbol}: {len(headlines)} headlines after LLM filter.")
898
 
899
+ # Ensure every headline has a category key for the frontend tag logic
900
+ for h in headlines:
901
+ if "category" not in h:
902
+ h["category"] = "financial"
903
+
904
+ # ── Sentiment scoring ────────────────────────────────────────────
905
+ total_score = 0.0
906
+ valid_count = 0
907
  if self.sentiment_analyzer and headlines:
908
+ for h in headlines:
909
+ title_text = str(h.get("title", "")).strip()
910
+ if not title_text:
911
+ continue
912
  try:
 
 
 
 
913
  result = self.sentiment_analyzer(title_text)[0]
914
+ label = result["label"]
915
+ score = result["score"]
916
+ if label == "positive":
 
917
  total_score += score
918
+ valid_count += 1
919
+ elif label == "negative":
920
  total_score -= score
921
+ valid_count += 1
922
+ else:
923
+ valid_count += 1
 
924
  except Exception:
925
  pass
926
+
927
+ avg_score = total_score / valid_count if valid_count > 0 else 0.0
928
  return avg_score, headlines
929
 
930
  def predict_trend(self, data, sentiment_score):
run_daily.py CHANGED
@@ -8,6 +8,10 @@ Usage examples:
8
  python run_daily.py # daemon loop, runs at 09:00 ET
9
  python run_daily.py --install-task # task checks every 5 min and runs at 09:00 ET
10
  python run_daily.py --uninstall-task
 
 
 
 
11
  """
12
  import argparse
13
  from datetime import datetime, timedelta
 
8
  python run_daily.py # daemon loop, runs at 09:00 ET
9
  python run_daily.py --install-task # task checks every 5 min and runs at 09:00 ET
10
  python run_daily.py --uninstall-task
11
+
12
+ Reminder: check live demo feedback logs at
13
+ https://brajmovech-iris-ai-demo.hf.space/api/admin/feedback
14
+
15
  """
16
  import argparse
17
  from datetime import datetime, timedelta