File size: 3,511 Bytes
c6d67ac 012bcc4 c6d67ac 012bcc4 c6d67ac 012bcc4 c6d67ac 25d293a c6d67ac 25d293a c6d67ac | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 | #!/usr/bin/env python3
"""
CLI script to ingest earnings-call transcripts into ChromaDB.
Usage:
python scripts/ingest_earnings_calls.py --tickers AAPL MSFT --quarters Q4-2024 Q1-2025
python scripts/ingest_earnings_calls.py --tickers TSLA --quarters Q1-2025
Data sources (tried in order):
1. Financial Modeling Prep (FMP) (free tier, 250 req/day)
2. SEC EDGAR 8-K filings (free, always available)
"""
import argparse
import os
import sys
from dotenv import load_dotenv
load_dotenv()
# Ensure project root is on sys.path so `core.*` imports work
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from core.config import Settings
from core.earnings_tools import ingest_earnings_call, parse_quarter
def main():
parser = argparse.ArgumentParser(
description="Ingest earnings-call transcripts into ChromaDB."
)
parser.add_argument(
"--tickers",
nargs="+",
required=True,
help="Stock tickers to ingest (e.g. --tickers AAPL MSFT)",
)
parser.add_argument(
"--quarters",
nargs="+",
required=True,
help="Quarters to ingest, format Q<N>-<YYYY> (e.g. --quarters Q4-2024 Q1-2025)",
)
args = parser.parse_args()
settings = Settings()
api_key = settings.fmp_api_key or os.getenv("FMP_API_KEY", "")
chroma_path = settings.earnings_chroma_path
os.makedirs(chroma_path, exist_ok=True)
# Parse quarters upfront to fail fast on bad formats
parsed_quarters: list[tuple[int, int]] = []
for q_str in args.quarters:
try:
q, y = parse_quarter(q_str)
parsed_quarters.append((q, y))
except ValueError as e:
print(f"[Error] {e}")
sys.exit(1)
results: list[dict] = []
for ticker in args.tickers:
ticker = ticker.upper()
for quarter, year in parsed_quarters:
print(f"\n{'=' * 50}")
print(f"Ingesting {ticker} Q{quarter}-{year}")
print(f"{'=' * 50}")
try:
status = ingest_earnings_call(
ticker=ticker,
quarter=quarter,
year=year,
api_key=api_key,
chroma_path=chroma_path,
)
except Exception as e:
print(f"[Error] Failed to ingest {ticker} Q{quarter}-{year}: {e}")
status = "error"
results.append(
{"ticker": ticker, "quarter": f"Q{quarter}-{year}", "status": status}
)
# Summary
print(f"\n{'=' * 50}")
print("INGEST SUMMARY")
print(f"{'=' * 50}")
for r in results:
icon = {
"success": "✅",
"partial": "🟡",
"failed": "❌",
"exists": "⏭️",
"error": "💥",
}.get(r["status"], "❓")
print(f" {icon} {r['ticker']} {r['quarter']}: {r['status']}")
errors = [r for r in results if r["status"] == "error"]
failed = [r for r in results if r["status"] == "failed"]
if errors:
print(f"\n[CRITICAL] {len(errors)} ingest(s) hit technical errors. Check logs.")
sys.exit(1)
if failed:
print(f"\n[INFO] {len(failed)} transcript(s) could not be found (likely not yet reported).")
print("This is not treated as a build failure.")
print("\nIngestion process completed successfully.")
sys.exit(0)
if __name__ == "__main__":
main()
|