Spaces:
Sleeping
Sleeping
fix: Update analyzer to handle new MCP source-keyed structure
Browse files- Add _extract_valuation_metric() for temporal wrappers with as_of
- Update valuation extraction to use new structure
- Update volatility extraction for fred/yahoo_finance sources
- Update macro extraction for bea/bls/fred sources
- Update news extraction for tavily/nyt/newsapi arrays
- Update sentiment extraction for finnhub/reddit arrays
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
- src/nodes/analyzer.py +127 -140
src/nodes/analyzer.py
CHANGED
|
@@ -159,12 +159,11 @@ def _extract_company_profile(raw_data: str) -> dict:
|
|
| 159 |
profile = {}
|
| 160 |
|
| 161 |
# Try SEC EDGAR for business address (most authoritative)
|
| 162 |
-
# Handle both
|
| 163 |
fin_all = multi_source.get("fundamentals_all", {})
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
else
|
| 167 |
-
sec_data = fin_all.get("sec_edgar", {}).get("data", {})
|
| 168 |
sec_profile = sec_data.get("company_info", {}) or sec_data.get("profile", {})
|
| 169 |
|
| 170 |
if sec_profile:
|
|
@@ -178,15 +177,14 @@ def _extract_company_profile(raw_data: str) -> dict:
|
|
| 178 |
profile["sic_description"] = sec_profile.get("sicDescription", "")
|
| 179 |
|
| 180 |
# Try Yahoo Finance for sector/industry and other details
|
| 181 |
-
|
|
|
|
| 182 |
yf_profile = yf_val.get("profile", {})
|
| 183 |
|
| 184 |
if not yf_profile:
|
| 185 |
-
#
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
else:
|
| 189 |
-
yf_fund = fin_all.get("yahoo_finance", {}).get("data", {})
|
| 190 |
yf_profile = yf_fund.get("profile", {})
|
| 191 |
|
| 192 |
if yf_profile:
|
|
@@ -229,6 +227,16 @@ def _extract_temporal_metric(metric_data: dict) -> dict:
|
|
| 229 |
}
|
| 230 |
|
| 231 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 232 |
def _get_fiscal_period_label(metric: dict) -> str:
|
| 233 |
"""Format fiscal period label from temporal data (e.g., 'FY 2023' or 'Q3 2024')."""
|
| 234 |
if not isinstance(metric, dict):
|
|
@@ -346,13 +354,11 @@ def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
|
|
| 346 |
|
| 347 |
# ========== FINANCIALS ==========
|
| 348 |
fin_all = multi_source.get("fundamentals_all", {})
|
| 349 |
-
# Handle both
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
else
|
| 354 |
-
sec_data = fin_all.get("sec_edgar", {}).get("data", {})
|
| 355 |
-
yf_data = fin_all.get("yahoo_finance", {}).get("data", {})
|
| 356 |
|
| 357 |
if sec_data or yf_data:
|
| 358 |
lines.append("## Financials")
|
|
@@ -393,8 +399,10 @@ def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
|
|
| 393 |
|
| 394 |
# ========== VALUATION ==========
|
| 395 |
val_all = multi_source.get("valuation_all", {})
|
| 396 |
-
|
| 397 |
-
|
|
|
|
|
|
|
| 398 |
|
| 399 |
if yf_val or av_val:
|
| 400 |
lines.append("## Valuation")
|
|
@@ -443,8 +451,10 @@ def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
|
|
| 443 |
ctx = vol_all.get("market_volatility_context", {})
|
| 444 |
vix = ctx.get("vix", {})
|
| 445 |
vxn = ctx.get("vxn", {})
|
| 446 |
-
|
| 447 |
-
|
|
|
|
|
|
|
| 448 |
|
| 449 |
# VIX
|
| 450 |
if vix.get("value"):
|
|
@@ -482,8 +492,10 @@ def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
|
|
| 482 |
lines.append("| Metric | Period | BEA/BLS | FRED |")
|
| 483 |
lines.append("|--------|--------|---------|------|")
|
| 484 |
|
| 485 |
-
|
| 486 |
-
|
|
|
|
|
|
|
| 487 |
|
| 488 |
# GDP Growth
|
| 489 |
gdp_p = bea_bls.get("gdp_growth", {}) or {}
|
|
@@ -511,83 +523,62 @@ def _generate_data_report(raw_data: str, is_financial: bool = False) -> str:
|
|
| 511 |
|
| 512 |
# ========== NEWS ==========
|
| 513 |
news = metrics.get("news", {})
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
|
| 522 |
-
|
| 523 |
-
|
| 524 |
-
|
| 525 |
-
|
| 526 |
-
|
| 527 |
-
|
| 528 |
-
|
| 529 |
-
|
| 530 |
-
|
|
|
|
| 531 |
|
| 532 |
# ========== SENTIMENT ==========
|
| 533 |
sentiment = metrics.get("sentiment", {})
|
| 534 |
if sentiment:
|
| 535 |
-
|
| 536 |
-
|
| 537 |
-
|
| 538 |
-
# Try both old format (finnhub_sentiment) and new format (metrics.finnhub)
|
| 539 |
-
finnhub = sentiment.get("finnhub_sentiment", {}) or sentiment.get("metrics", {}).get("finnhub", {})
|
| 540 |
-
reddit = sentiment.get("reddit_sentiment", {}) or sentiment.get("metrics", {}).get("reddit", {})
|
| 541 |
-
|
| 542 |
-
finn_articles = finnhub.get("articles", [])
|
| 543 |
-
finn_score = finnhub.get("score", finnhub.get("composite_score", "N/A"))
|
| 544 |
-
finn_count = finnhub.get("articles_analyzed", len(finn_articles))
|
| 545 |
-
|
| 546 |
-
reddit_posts = reddit.get("posts", [])
|
| 547 |
-
reddit_score = reddit.get("score", reddit.get("composite_score", "N/A"))
|
| 548 |
-
reddit_count = reddit.get("posts_analyzed", len(reddit_posts))
|
| 549 |
|
| 550 |
lines.append("## Sentiment Analysis")
|
| 551 |
-
lines.append(f"Composite Score: {composite_score}/100 - {interpretation}")
|
| 552 |
lines.append("")
|
| 553 |
-
lines.append("| Source |
|
| 554 |
-
lines.append("|--------|-------|
|
| 555 |
-
lines.append(f"| Finnhub | {
|
| 556 |
-
lines.append(f"| Reddit | {
|
| 557 |
lines.append("")
|
| 558 |
|
| 559 |
-
# Show
|
| 560 |
-
if
|
| 561 |
lines.append("### Finnhub Articles")
|
| 562 |
lines.append("")
|
| 563 |
-
lines.append("| # |
|
| 564 |
-
lines.append("|---|-------
|
| 565 |
-
for i, article in enumerate(
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
sent = f"{sent:+.2f}"
|
| 570 |
-
url = article.get("url", article.get("link", ""))
|
| 571 |
-
lines.append(f"| {i} | {headline} | {sent} | {url} |")
|
| 572 |
lines.append("")
|
| 573 |
|
| 574 |
-
# Show Reddit posts
|
| 575 |
if reddit_posts:
|
| 576 |
lines.append("### Reddit Posts")
|
| 577 |
lines.append("")
|
| 578 |
-
lines.append("| # | Title |
|
| 579 |
-
lines.append("|---|-------|-----
|
| 580 |
for i, post in enumerate(reddit_posts[:10], 1):
|
| 581 |
title = post.get("title", "Untitled")
|
| 582 |
-
|
| 583 |
-
|
| 584 |
-
sent = post.get("sentiment_score", post.get("sentiment", "N/A"))
|
| 585 |
-
if isinstance(sent, (int, float)):
|
| 586 |
-
sent = f"{sent:+.2f}"
|
| 587 |
-
url = post.get("url", post.get("permalink", ""))
|
| 588 |
-
if url and not url.startswith("http"):
|
| 589 |
-
url = f"https://reddit.com{url}"
|
| 590 |
-
lines.append(f"| {i} | {title} | {subreddit} | {upvotes} | {sent} | {url} |")
|
| 591 |
lines.append("")
|
| 592 |
|
| 593 |
lines.append("---")
|
|
@@ -621,20 +612,18 @@ def _extract_key_metrics(raw_data: str) -> dict:
|
|
| 621 |
|
| 622 |
# Extract fundamentals with temporal data
|
| 623 |
# Structure varies:
|
|
|
|
| 624 |
# - Old: {"sec_edgar": {"data": {...}}, "yahoo_finance": {"data": {...}}}
|
| 625 |
-
# - New
|
| 626 |
fin = metrics.get("fundamentals", {})
|
| 627 |
if not fin or "error" in fin:
|
| 628 |
fin = data.get("multi_source", {}).get("fundamentals_all", {})
|
| 629 |
if fin and "error" not in fin:
|
| 630 |
-
# Handle both
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
else:
|
| 636 |
-
sec_data = fin.get("sec_edgar", {}).get("data", {})
|
| 637 |
-
yf_data = fin.get("yahoo_finance", {}).get("data", {})
|
| 638 |
# Merge with SEC as primary
|
| 639 |
fin_data = {**yf_data, **sec_data} # SEC overwrites YF where both exist
|
| 640 |
extracted["fundamentals"] = {
|
|
@@ -650,98 +639,96 @@ def _extract_key_metrics(raw_data: str) -> dict:
|
|
| 650 |
}
|
| 651 |
|
| 652 |
# Extract valuation (with temporal data)
|
| 653 |
-
#
|
| 654 |
val = metrics.get("valuation", {})
|
| 655 |
if not val or "error" in val:
|
| 656 |
val = data.get("multi_source", {}).get("valuation_all", {})
|
| 657 |
if val and "error" not in val:
|
| 658 |
-
|
| 659 |
-
|
| 660 |
extracted["valuation"] = {
|
| 661 |
-
"pe_trailing":
|
| 662 |
-
"pe_forward":
|
| 663 |
-
"pb_ratio":
|
| 664 |
-
"ps_ratio":
|
| 665 |
-
"ev_ebitda":
|
| 666 |
"valuation_signal": val.get("overall_signal"),
|
| 667 |
-
"as_of": val_date,
|
| 668 |
}
|
| 669 |
|
| 670 |
# Extract volatility (with temporal data)
|
| 671 |
-
#
|
| 672 |
vol = metrics.get("volatility", {})
|
| 673 |
if not vol or "error" in vol:
|
| 674 |
vol = data.get("multi_source", {}).get("volatility_all", {})
|
| 675 |
if vol and "error" not in vol:
|
| 676 |
-
|
| 677 |
-
|
| 678 |
-
|
| 679 |
-
|
| 680 |
-
|
| 681 |
-
|
|
|
|
| 682 |
extracted["volatility"] = {
|
| 683 |
-
"beta":
|
| 684 |
-
|
| 685 |
-
"
|
| 686 |
-
"end_date": vix_data.get("date") or vol_date if isinstance(vix_data, dict) else vol_date},
|
| 687 |
-
"historical_volatility": {"value": hv_data.get("value") if isinstance(hv_data, dict) else hv_data,
|
| 688 |
-
"end_date": hv_data.get("date") or vol_date if isinstance(hv_data, dict) else vol_date},
|
| 689 |
-
"as_of": vol_date,
|
| 690 |
}
|
| 691 |
|
| 692 |
# Extract macro (with temporal data)
|
| 693 |
-
#
|
| 694 |
macro = metrics.get("macro", {})
|
| 695 |
if not macro or "error" in macro:
|
| 696 |
macro = data.get("multi_source", {}).get("macro_all", {})
|
| 697 |
if macro and "error" not in macro:
|
| 698 |
-
|
| 699 |
-
|
| 700 |
-
|
| 701 |
-
|
| 702 |
-
|
| 703 |
-
|
| 704 |
-
|
| 705 |
-
|
|
|
|
|
|
|
| 706 |
extracted["macro"] = {
|
| 707 |
-
"gdp_growth":
|
| 708 |
-
|
| 709 |
-
"
|
| 710 |
-
|
| 711 |
-
"inflation": {"value": inflation.get("value") if isinstance(inflation, dict) else inflation,
|
| 712 |
-
"end_date": inflation.get("date") or inflation.get("period") if isinstance(inflation, dict) else None},
|
| 713 |
-
"unemployment": {"value": unemp.get("value") if isinstance(unemp, dict) else unemp,
|
| 714 |
-
"end_date": unemp.get("date") or unemp.get("period") if isinstance(unemp, dict) else None},
|
| 715 |
}
|
| 716 |
|
| 717 |
# Extract news with VADER sentiment
|
|
|
|
| 718 |
news = metrics.get("news", {})
|
| 719 |
if news and "error" not in news:
|
| 720 |
-
|
| 721 |
-
|
|
|
|
|
|
|
|
|
|
| 722 |
|
| 723 |
# Compute VADER sentiment on headlines
|
| 724 |
vader_news = _compute_vader_sentiment(headlines)
|
| 725 |
|
| 726 |
extracted["news"] = {
|
| 727 |
-
"article_count": len(
|
| 728 |
-
"headlines": [a.get("title", "")[:100] for a in
|
| 729 |
"vader_sentiment": vader_news,
|
| 730 |
}
|
| 731 |
|
| 732 |
# Extract sentiment with VADER on reddit posts
|
|
|
|
| 733 |
sent = metrics.get("sentiment", {})
|
| 734 |
if sent and "error" not in sent:
|
| 735 |
-
|
| 736 |
-
reddit_posts = sent.get("reddit_posts", [])
|
| 737 |
reddit_titles = [p.get("title", "") for p in reddit_posts if p.get("title")]
|
| 738 |
|
| 739 |
# Compute VADER sentiment on reddit titles
|
| 740 |
vader_reddit = _compute_vader_sentiment(reddit_titles)
|
| 741 |
|
| 742 |
extracted["sentiment"] = {
|
| 743 |
-
"
|
| 744 |
-
"
|
| 745 |
"vader_reddit": vader_reddit,
|
| 746 |
}
|
| 747 |
|
|
|
|
| 159 |
profile = {}
|
| 160 |
|
| 161 |
# Try SEC EDGAR for business address (most authoritative)
|
| 162 |
+
# Handle both old format (with "data" wrapper) and new flat format
|
| 163 |
fin_all = multi_source.get("fundamentals_all", {})
|
| 164 |
+
sec_source = fin_all.get("sec_edgar", {})
|
| 165 |
+
# Check if old format with "data" wrapper or new flat format
|
| 166 |
+
sec_data = sec_source.get("data", sec_source) if "data" in sec_source else sec_source
|
|
|
|
| 167 |
sec_profile = sec_data.get("company_info", {}) or sec_data.get("profile", {})
|
| 168 |
|
| 169 |
if sec_profile:
|
|
|
|
| 177 |
profile["sic_description"] = sec_profile.get("sicDescription", "")
|
| 178 |
|
| 179 |
# Try Yahoo Finance for sector/industry and other details
|
| 180 |
+
yf_val_source = multi_source.get("valuation_all", {}).get("yahoo_finance", {})
|
| 181 |
+
yf_val = yf_val_source.get("data", yf_val_source) if "data" in yf_val_source else yf_val_source
|
| 182 |
yf_profile = yf_val.get("profile", {})
|
| 183 |
|
| 184 |
if not yf_profile:
|
| 185 |
+
# Try fundamentals yahoo_finance
|
| 186 |
+
yf_fund_source = fin_all.get("yahoo_finance", {})
|
| 187 |
+
yf_fund = yf_fund_source.get("data", yf_fund_source) if "data" in yf_fund_source else yf_fund_source
|
|
|
|
|
|
|
| 188 |
yf_profile = yf_fund.get("profile", {})
|
| 189 |
|
| 190 |
if yf_profile:
|
|
|
|
| 227 |
}
|
| 228 |
|
| 229 |
|
| 230 |
+
def _extract_valuation_metric(metric_data: dict) -> dict:
|
| 231 |
+
"""Extract valuation metric with as_of date (new MCP structure)."""
|
| 232 |
+
if not isinstance(metric_data, dict):
|
| 233 |
+
return {"value": metric_data}
|
| 234 |
+
return {
|
| 235 |
+
"value": metric_data.get("value"),
|
| 236 |
+
"end_date": metric_data.get("as_of"), # MCP uses "as_of" for valuation
|
| 237 |
+
}
|
| 238 |
+
|
| 239 |
+
|
| 240 |
def _get_fiscal_period_label(metric: dict) -> str:
|
| 241 |
"""Format fiscal period label from temporal data (e.g., 'FY 2023' or 'Q3 2024')."""
|
| 242 |
if not isinstance(metric, dict):
|
|
|
|
| 354 |
|
| 355 |
# ========== FINANCIALS ==========
|
| 356 |
fin_all = multi_source.get("fundamentals_all", {})
|
| 357 |
+
# Handle both old format (with "data" wrapper) and new flat format
|
| 358 |
+
sec_source = fin_all.get("sec_edgar", {})
|
| 359 |
+
sec_data = sec_source.get("data", sec_source) if "data" in sec_source else sec_source
|
| 360 |
+
yf_source = fin_all.get("yahoo_finance", {})
|
| 361 |
+
yf_data = yf_source.get("data", yf_source) if "data" in yf_source else yf_source
|
|
|
|
|
|
|
| 362 |
|
| 363 |
if sec_data or yf_data:
|
| 364 |
lines.append("## Financials")
|
|
|
|
| 399 |
|
| 400 |
# ========== VALUATION ==========
|
| 401 |
val_all = multi_source.get("valuation_all", {})
|
| 402 |
+
yf_val_src = val_all.get("yahoo_finance", {})
|
| 403 |
+
yf_val = yf_val_src.get("data", yf_val_src) if "data" in yf_val_src else yf_val_src
|
| 404 |
+
av_val_src = val_all.get("alpha_vantage", {})
|
| 405 |
+
av_val = av_val_src.get("data", av_val_src) if "data" in av_val_src else av_val_src
|
| 406 |
|
| 407 |
if yf_val or av_val:
|
| 408 |
lines.append("## Valuation")
|
|
|
|
| 451 |
ctx = vol_all.get("market_volatility_context", {})
|
| 452 |
vix = ctx.get("vix", {})
|
| 453 |
vxn = ctx.get("vxn", {})
|
| 454 |
+
yf_vol_src = vol_all.get("yahoo_finance", {})
|
| 455 |
+
yf_vol = yf_vol_src.get("data", yf_vol_src) if "data" in yf_vol_src else yf_vol_src
|
| 456 |
+
av_vol_src = vol_all.get("alpha_vantage", {})
|
| 457 |
+
av_vol = av_vol_src.get("data", av_vol_src) if "data" in av_vol_src else av_vol_src
|
| 458 |
|
| 459 |
# VIX
|
| 460 |
if vix.get("value"):
|
|
|
|
| 492 |
lines.append("| Metric | Period | BEA/BLS | FRED |")
|
| 493 |
lines.append("|--------|--------|---------|------|")
|
| 494 |
|
| 495 |
+
bea_src = macro_all.get("bea_bls", {})
|
| 496 |
+
bea_bls = bea_src.get("data", bea_src) if "data" in bea_src else bea_src
|
| 497 |
+
fred_src = macro_all.get("fred", {})
|
| 498 |
+
fred = fred_src.get("data", fred_src) if "data" in fred_src else fred_src
|
| 499 |
|
| 500 |
# GDP Growth
|
| 501 |
gdp_p = bea_bls.get("gdp_growth", {}) or {}
|
|
|
|
| 523 |
|
| 524 |
# ========== NEWS ==========
|
| 525 |
news = metrics.get("news", {})
|
| 526 |
+
if news:
|
| 527 |
+
# New format: {tavily: [...], nyt: [...], newsapi: [...]}
|
| 528 |
+
all_articles = []
|
| 529 |
+
for source in ["tavily", "nyt", "newsapi"]:
|
| 530 |
+
for article in news.get(source, []):
|
| 531 |
+
all_articles.append({**article, "source": source})
|
| 532 |
+
|
| 533 |
+
if all_articles:
|
| 534 |
+
lines.append("## News Articles")
|
| 535 |
+
lines.append("")
|
| 536 |
+
lines.append("| # | Title | Source | URL |")
|
| 537 |
+
lines.append("|---|-------|--------|-----|")
|
| 538 |
+
for i, article in enumerate(all_articles[:10], 1):
|
| 539 |
+
title = article.get("title", "Untitled")
|
| 540 |
+
source = article.get("source", "Unknown")
|
| 541 |
+
url = article.get("url", "")
|
| 542 |
+
lines.append(f"| {i} | {title} | {source} | {url} |")
|
| 543 |
+
lines.append("")
|
| 544 |
|
| 545 |
# ========== SENTIMENT ==========
|
| 546 |
sentiment = metrics.get("sentiment", {})
|
| 547 |
if sentiment:
|
| 548 |
+
# New format: {finnhub: [...], reddit: [...]}
|
| 549 |
+
finnhub_articles = sentiment.get("finnhub", [])
|
| 550 |
+
reddit_posts = sentiment.get("reddit", [])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 551 |
|
| 552 |
lines.append("## Sentiment Analysis")
|
|
|
|
| 553 |
lines.append("")
|
| 554 |
+
lines.append("| Source | Items |")
|
| 555 |
+
lines.append("|--------|-------|")
|
| 556 |
+
lines.append(f"| Finnhub | {len(finnhub_articles)} articles |")
|
| 557 |
+
lines.append(f"| Reddit | {len(reddit_posts)} posts |")
|
| 558 |
lines.append("")
|
| 559 |
|
| 560 |
+
# Show Finnhub articles
|
| 561 |
+
if finnhub_articles:
|
| 562 |
lines.append("### Finnhub Articles")
|
| 563 |
lines.append("")
|
| 564 |
+
lines.append("| # | Title | URL |")
|
| 565 |
+
lines.append("|---|-------|-----|")
|
| 566 |
+
for i, article in enumerate(finnhub_articles[:10], 1):
|
| 567 |
+
title = article.get("title", "Untitled")
|
| 568 |
+
url = article.get("url", "")
|
| 569 |
+
lines.append(f"| {i} | {title} | {url} |")
|
|
|
|
|
|
|
|
|
|
| 570 |
lines.append("")
|
| 571 |
|
| 572 |
+
# Show Reddit posts
|
| 573 |
if reddit_posts:
|
| 574 |
lines.append("### Reddit Posts")
|
| 575 |
lines.append("")
|
| 576 |
+
lines.append("| # | Title | URL |")
|
| 577 |
+
lines.append("|---|-------|-----|")
|
| 578 |
for i, post in enumerate(reddit_posts[:10], 1):
|
| 579 |
title = post.get("title", "Untitled")
|
| 580 |
+
url = post.get("url", "")
|
| 581 |
+
lines.append(f"| {i} | {title} | {url} |")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
lines.append("")
|
| 583 |
|
| 584 |
lines.append("---")
|
|
|
|
| 612 |
|
| 613 |
# Extract fundamentals with temporal data
|
| 614 |
# Structure varies:
|
| 615 |
+
# Formats supported:
|
| 616 |
# - Old: {"sec_edgar": {"data": {...}}, "yahoo_finance": {"data": {...}}}
|
| 617 |
+
# - New (flat): {"sec_edgar": {...}, "yahoo_finance": {...}}
|
| 618 |
fin = metrics.get("fundamentals", {})
|
| 619 |
if not fin or "error" in fin:
|
| 620 |
fin = data.get("multi_source", {}).get("fundamentals_all", {})
|
| 621 |
if fin and "error" not in fin:
|
| 622 |
+
# Handle both old format (with "data" wrapper) and new flat format
|
| 623 |
+
sec_source = fin.get("sec_edgar", {})
|
| 624 |
+
sec_data = sec_source.get("data", sec_source) if "data" in sec_source else sec_source
|
| 625 |
+
yf_source = fin.get("yahoo_finance", {})
|
| 626 |
+
yf_data = yf_source.get("data", yf_source) if "data" in yf_source else yf_source
|
|
|
|
|
|
|
|
|
|
| 627 |
# Merge with SEC as primary
|
| 628 |
fin_data = {**yf_data, **sec_data} # SEC overwrites YF where both exist
|
| 629 |
extracted["fundamentals"] = {
|
|
|
|
| 639 |
}
|
| 640 |
|
| 641 |
# Extract valuation (with temporal data)
|
| 642 |
+
# Handle both old format (with "data" wrapper) and new flat format
|
| 643 |
val = metrics.get("valuation", {})
|
| 644 |
if not val or "error" in val:
|
| 645 |
val = data.get("multi_source", {}).get("valuation_all", {})
|
| 646 |
if val and "error" not in val:
|
| 647 |
+
yf_source = val.get("yahoo_finance", {})
|
| 648 |
+
yf_val = yf_source.get("data", yf_source) if "data" in yf_source else yf_source
|
| 649 |
extracted["valuation"] = {
|
| 650 |
+
"pe_trailing": _extract_valuation_metric(yf_val.get("trailing_pe", {})),
|
| 651 |
+
"pe_forward": _extract_valuation_metric(yf_val.get("forward_pe", {})),
|
| 652 |
+
"pb_ratio": _extract_valuation_metric(yf_val.get("price_to_book", {})),
|
| 653 |
+
"ps_ratio": _extract_valuation_metric(yf_val.get("price_to_sales", {})),
|
| 654 |
+
"ev_ebitda": _extract_valuation_metric(yf_val.get("ev_ebitda", {})),
|
| 655 |
"valuation_signal": val.get("overall_signal"),
|
|
|
|
| 656 |
}
|
| 657 |
|
| 658 |
# Extract volatility (with temporal data)
|
| 659 |
+
# New structure: {fred: {vix: {...}}, yahoo_finance: {beta: {...}}}
|
| 660 |
vol = metrics.get("volatility", {})
|
| 661 |
if not vol or "error" in vol:
|
| 662 |
vol = data.get("multi_source", {}).get("volatility_all", {})
|
| 663 |
if vol and "error" not in vol:
|
| 664 |
+
# Yahoo Finance data (beta, historical volatility)
|
| 665 |
+
yf_vol_source = vol.get("yahoo_finance", {})
|
| 666 |
+
yf_vol = yf_vol_source.get("data", yf_vol_source) if "data" in yf_vol_source else yf_vol_source
|
| 667 |
+
# FRED data (VIX)
|
| 668 |
+
fred_source = vol.get("fred", {})
|
| 669 |
+
fred_vol = fred_source.get("data", fred_source) if "data" in fred_source else fred_source
|
| 670 |
+
|
| 671 |
extracted["volatility"] = {
|
| 672 |
+
"beta": _extract_valuation_metric(yf_vol.get("beta", {})),
|
| 673 |
+
"vix": _extract_valuation_metric(fred_vol.get("vix", {})),
|
| 674 |
+
"historical_volatility": _extract_valuation_metric(yf_vol.get("historical_volatility", {})),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 675 |
}
|
| 676 |
|
| 677 |
# Extract macro (with temporal data)
|
| 678 |
+
# New structure: {bea: {gdp_growth: {...}}, bls: {unemployment_rate: {...}}, fred: {fed_funds_rate: {...}}}
|
| 679 |
macro = metrics.get("macro", {})
|
| 680 |
if not macro or "error" in macro:
|
| 681 |
macro = data.get("multi_source", {}).get("macro_all", {})
|
| 682 |
if macro and "error" not in macro:
|
| 683 |
+
# BEA data (GDP)
|
| 684 |
+
bea_source = macro.get("bea", {})
|
| 685 |
+
bea = bea_source.get("data", bea_source) if "data" in bea_source else bea_source
|
| 686 |
+
# BLS data (unemployment, CPI)
|
| 687 |
+
bls_source = macro.get("bls", {})
|
| 688 |
+
bls = bls_source.get("data", bls_source) if "data" in bls_source else bls_source
|
| 689 |
+
# FRED data (interest rates)
|
| 690 |
+
fred_source = macro.get("fred", {})
|
| 691 |
+
fred = fred_source.get("data", fred_source) if "data" in fred_source else fred_source
|
| 692 |
+
|
| 693 |
extracted["macro"] = {
|
| 694 |
+
"gdp_growth": _extract_valuation_metric(bea.get("gdp_growth", {})),
|
| 695 |
+
"interest_rate": _extract_valuation_metric(fred.get("fed_funds_rate", {})),
|
| 696 |
+
"inflation": _extract_valuation_metric(bls.get("cpi_yoy", {})),
|
| 697 |
+
"unemployment": _extract_valuation_metric(bls.get("unemployment_rate", {})),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 698 |
}
|
| 699 |
|
| 700 |
# Extract news with VADER sentiment
|
| 701 |
+
# New format: {tavily: [...], nyt: [...], newsapi: [...]}
|
| 702 |
news = metrics.get("news", {})
|
| 703 |
if news and "error" not in news:
|
| 704 |
+
all_articles = []
|
| 705 |
+
for source in ["tavily", "nyt", "newsapi"]:
|
| 706 |
+
all_articles.extend(news.get(source, []))
|
| 707 |
+
|
| 708 |
+
headlines = [a.get("title", "") for a in all_articles if a.get("title")]
|
| 709 |
|
| 710 |
# Compute VADER sentiment on headlines
|
| 711 |
vader_news = _compute_vader_sentiment(headlines)
|
| 712 |
|
| 713 |
extracted["news"] = {
|
| 714 |
+
"article_count": len(all_articles),
|
| 715 |
+
"headlines": [a.get("title", "")[:100] for a in all_articles[:5]],
|
| 716 |
"vader_sentiment": vader_news,
|
| 717 |
}
|
| 718 |
|
| 719 |
# Extract sentiment with VADER on reddit posts
|
| 720 |
+
# New format: {finnhub: [...], reddit: [...]}
|
| 721 |
sent = metrics.get("sentiment", {})
|
| 722 |
if sent and "error" not in sent:
|
| 723 |
+
reddit_posts = sent.get("reddit", [])
|
|
|
|
| 724 |
reddit_titles = [p.get("title", "") for p in reddit_posts if p.get("title")]
|
| 725 |
|
| 726 |
# Compute VADER sentiment on reddit titles
|
| 727 |
vader_reddit = _compute_vader_sentiment(reddit_titles)
|
| 728 |
|
| 729 |
extracted["sentiment"] = {
|
| 730 |
+
"finnhub_count": len(sent.get("finnhub", [])),
|
| 731 |
+
"reddit_count": len(reddit_posts),
|
| 732 |
"vader_reddit": vader_reddit,
|
| 733 |
}
|
| 734 |
|