"""EDGAR API Client Module""" import requests try: from sec_edgar_api.EdgarClient import EdgarClient except ImportError: EdgarClient = None import json import time class EdgarDataClient: def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"): """Initialize EDGAR client""" self.user_agent = user_agent if EdgarClient: self.edgar = EdgarClient(user_agent=user_agent) else: self.edgar = None def search_company_by_name(self, company_name): """Search company CIK by company name""" try: # Use SEC company ticker database url = "https://www.sec.gov/files/company_tickers.json" headers = {"User-Agent": self.user_agent} response = requests.get(url, headers=headers) response.raise_for_status() companies = response.json() # Search for matching company names matches = [] exact_matches = [] for _, company in companies.items(): company_title = company["title"].lower() search_name = company_name.lower() # Exact match if search_name == company_title: exact_matches.append({ "cik": str(company["cik_str"]).zfill(10), "name": company["title"], "ticker": company["ticker"] }) # Partial match elif search_name in company_title or \ search_name in company["ticker"].lower(): matches.append({ "cik": str(company["cik_str"]).zfill(10), "name": company["title"], "ticker": company["ticker"] }) # Return exact match first, then partial match if exact_matches: return exact_matches[0] elif matches: return matches[0] else: return None except Exception as e: print(f"Error searching company: {e}") return None def get_company_info(self, cik): """ Get basic company information Args: cik (str): Company CIK code Returns: dict: Dictionary containing company information """ if not self.edgar: print("sec_edgar_api library not installed") return None try: # Get company submissions submissions = self.edgar.get_submissions(cik=cik) return { "cik": cik, "name": submissions.get("name", ""), "tickers": submissions.get("tickers", []), "sic": submissions.get("sic", ""), "sic_description": submissions.get("sicDescription", "") } except Exception as e: print(f"Error getting company info: {e}") return None def get_company_filings(self, cik, form_types=None): """ Get all company filing documents Args: cik (str): Company CIK code form_types (list): List of form types, e.g., ['10-K', '10-Q'], None for all types Returns: list: List of filing documents """ if not self.edgar: print("sec_edgar_api library not installed") return [] try: # Get company submissions submissions = self.edgar.get_submissions(cik=cik) # Extract filing information filings = [] recent = submissions.get("filings", {}).get("recent", {}) # Get data from each field form_types_list = recent.get("form", []) filing_dates = recent.get("filingDate", []) accession_numbers = recent.get("accessionNumber", []) primary_documents = recent.get("primaryDocument", []) # Iterate through all filings for i in range(len(form_types_list)): form_type = form_types_list[i] # Filter by form type if specified if form_types and form_type not in form_types: continue filing_date = filing_dates[i] if i < len(filing_dates) else "" accession_number = accession_numbers[i] if i < len(accession_numbers) else "" primary_document = primary_documents[i] if i < len(primary_documents) else "" filing = { "form_type": form_type, "filing_date": filing_date, "accession_number": accession_number, "primary_document": primary_document } filings.append(filing) return filings except Exception as e: print(f"Error getting company filings: {e}") return [] def get_company_facts(self, cik): """ Get all company financial facts data Args: cik (str): Company CIK code Returns: dict: Company financial facts data """ if not self.edgar: print("sec_edgar_api library not installed") return {} try: facts = self.edgar.get_company_facts(cik=cik) return facts except Exception as e: print(f"Error getting company facts: {e}") return {} def get_financial_data_for_period(self, cik, period): """ Get financial data for a specific period (supports annual and quarterly) Args: cik (str): Company CIK code period (str): Period in format 'YYYY' or 'YYYYQX' (e.g., '2025' or '2025Q3') Returns: dict: Financial data dictionary """ if not self.edgar: print("sec_edgar_api library not installed") return {} try: # Get company financial facts facts = self.get_company_facts(cik) if not facts: return {} # Extract us-gaap and ifrs-full financial data (20-F may use IFRS) us_gaap = facts.get("facts", {}).get("us-gaap", {}) ifrs_full = facts.get("facts", {}).get("ifrs-full", {}) # Define financial metrics and their XBRL tags # Include multiple possible tags to improve match rate (including US-GAAP and IFRS tags) financial_metrics = { "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"], "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"], "earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"], "operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"], "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"], } # Store result result = {"period": period} # Determine target form types to search if 'Q' in period: # Quarterly data, mainly search 10-Q (20-F usually doesn't have quarterly reports) target_forms = ["10-Q"] target_forms_annual = ["10-K", "20-F"] # for fallback year = int(period.split('Q')[0]) quarter = period.split('Q')[1] else: # Annual data, search 10-K and 20-F annual forms target_forms = ["10-K", "20-F"] target_forms_annual = target_forms year = int(period) quarter = None # Get company filings to find accession number and primary document filings = self.get_company_filings(cik, form_types=target_forms) filings_map = {} # Map: form -> {accession_number, primary_document, filing_date} # Build filing map for quick lookup for filing in filings: form_type = filing.get("form_type", "") filing_date = filing.get("filing_date", "") accession_number = filing.get("accession_number", "") primary_document = filing.get("primary_document", "") if filing_date and accession_number: # Extract year from filing_date (format: YYYY-MM-DD) file_year = int(filing_date[:4]) if len(filing_date) >= 4 else 0 # Store filing if it matches the period year if file_year == year: key = f"{form_type}_{file_year}" if key not in filings_map: filings_map[key] = { "accession_number": accession_number, "primary_document": primary_document, "form_type": form_type, "filing_date": filing_date } # Iterate through each financial metric for metric_key, metric_tags in financial_metrics.items(): # Support multiple possible tags for metric_tag in metric_tags: # Search both US-GAAP and IFRS tags metric_data = None data_source = None if metric_tag in us_gaap: metric_data = us_gaap[metric_tag] data_source = "us-gaap" elif metric_tag in ifrs_full: metric_data = ifrs_full[metric_tag] data_source = "ifrs-full" if metric_data: units = metric_data.get("units", {}) # Find USD unit data (supports USD and USD/shares) usd_data = None if "USD" in units: usd_data = units["USD"] elif "USD/shares" in units and metric_key == "earnings_per_share": # EPS uses USD/shares unit usd_data = units["USD/shares"] if usd_data: # Try exact match first, then loose match matched_entry = None # Search for data in the specified period for entry in usd_data: form = entry.get("form", "") fy = entry.get("fy", 0) fp = entry.get("fp", "") end_date = entry.get("end", "") if not end_date or len(end_date) < 4: continue entry_year = int(end_date[:4]) # Check if form type matches if form in target_forms: if quarter: # Quarterly data match if entry_year == year and fp == f"Q{quarter}": # If already matched, compare end date, choose the latest if matched_entry: if entry.get("end", "") > matched_entry.get("end", ""): matched_entry = entry else: matched_entry = entry else: # Annual data match - prioritize fiscal year (fy) field # Strategy 1: Exact match by fiscal year if fy == year and (fp == "FY" or fp == "" or not fp): # If already matched, compare end date, choose the latest if matched_entry: if entry.get("end", "") > matched_entry.get("end", ""): matched_entry = entry else: matched_entry = entry # Strategy 2: Match by end date year (when fy not available or doesn't match) elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp): matched_entry = entry # Strategy 3: Allow fy to differ by 1 year (fiscal year vs calendar year mismatch) elif not matched_entry and fy > 0 and abs(fy - year) <= 1 and (fp == "FY" or fp == "" or not fp): matched_entry = entry # Strategy 4: Match by frame field for 20-F elif not matched_entry and form == "20-F" and "frame" in entry: frame = entry.get("frame", "") if f"CY{year}" in frame or str(year) in end_date: matched_entry = entry # If quarterly data not found, try finding from annual report (fallback strategy) if not matched_entry and quarter and target_forms_annual: for entry in usd_data: form = entry.get("form", "") end_date = entry.get("end", "") fp = entry.get("fp", "") if form in target_forms_annual and end_date: # Check if end date is within this quarter range if str(year) in end_date and f"Q{quarter}" in fp: matched_entry = entry break # Apply matched data if matched_entry: result[metric_key] = matched_entry.get("val", 0) # Get form and accession info form_type = matched_entry.get("form", "") accn_from_facts = matched_entry.get('accn', '').replace('-', '') # Try to get accession_number and primary_document from filings filing_key = f"{form_type}_{year}" filing_info = filings_map.get(filing_key) if filing_info: # Use filing info from get_company_filings accession_number = filing_info["accession_number"].replace('-', '') primary_document = filing_info["primary_document"] # Generate complete source URL if primary_document: result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}" else: result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100" else: # Fallback to company browse page if filing not found result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100" result["source_form"] = form_type result["data_source"] = data_source # Add detailed information result[f"{metric_key}_details"] = { "tag": metric_tag, "form": matched_entry.get("form", ""), "fy": matched_entry.get("fy", 0), "fp": matched_entry.get("fp", ""), "val": matched_entry.get("val", 0), "start": matched_entry.get("start", ""), "end": matched_entry.get("end", ""), "accn": matched_entry.get("accn", ""), "filed": matched_entry.get("filed", ""), "frame": matched_entry.get("frame", ""), "data_source": data_source } # If data is found, break out of tag loop if metric_key in result: break return result except Exception as e: print(f"Error getting financial data for period {period}: {e}") return {}