Spaces:
Runtime error
Runtime error
| """EDGAR API Client Module""" | |
| import requests | |
| try: | |
| from sec_edgar_api.EdgarClient import EdgarClient | |
| except ImportError: | |
| EdgarClient = None | |
| import json | |
| import time | |
| class EdgarDataClient: | |
| def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"): | |
| """Initialize EDGAR client""" | |
| self.user_agent = user_agent | |
| if EdgarClient: | |
| self.edgar = EdgarClient(user_agent=user_agent) | |
| else: | |
| self.edgar = None | |
| def search_company_by_name(self, company_name): | |
| """Search company CIK by company name""" | |
| try: | |
| # Use SEC company ticker database | |
| url = "https://www.sec.gov/files/company_tickers.json" | |
| headers = {"User-Agent": self.user_agent} | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| companies = response.json() | |
| # Search for matching company names | |
| matches = [] | |
| exact_matches = [] | |
| for _, company in companies.items(): | |
| company_title = company["title"].lower() | |
| search_name = company_name.lower() | |
| # Exact match | |
| if search_name == company_title: | |
| exact_matches.append({ | |
| "cik": str(company["cik_str"]).zfill(10), | |
| "name": company["title"], | |
| "ticker": company["ticker"] | |
| }) | |
| # Partial match | |
| elif search_name in company_title or \ | |
| search_name in company["ticker"].lower(): | |
| matches.append({ | |
| "cik": str(company["cik_str"]).zfill(10), | |
| "name": company["title"], | |
| "ticker": company["ticker"] | |
| }) | |
| # Return exact match first, then partial match | |
| if exact_matches: | |
| return exact_matches[0] | |
| elif matches: | |
| return matches[0] | |
| else: | |
| return None | |
| except Exception as e: | |
| print(f"Error searching company: {e}") | |
| return None | |
| def get_company_info(self, cik): | |
| """ | |
| Get basic company information | |
| Args: | |
| cik (str): Company CIK code | |
| Returns: | |
| dict: Dictionary containing company information | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api library not installed") | |
| return None | |
| try: | |
| # Get company submissions | |
| submissions = self.edgar.get_submissions(cik=cik) | |
| return { | |
| "cik": cik, | |
| "name": submissions.get("name", ""), | |
| "tickers": submissions.get("tickers", []), | |
| "sic": submissions.get("sic", ""), | |
| "sic_description": submissions.get("sicDescription", "") | |
| } | |
| except Exception as e: | |
| print(f"Error getting company info: {e}") | |
| return None | |
| def get_company_filings(self, cik, form_types=None): | |
| """ | |
| Get all company filing documents | |
| Args: | |
| cik (str): Company CIK code | |
| form_types (list): List of form types, e.g., ['10-K', '10-Q'], None for all types | |
| Returns: | |
| list: List of filing documents | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api library not installed") | |
| return [] | |
| try: | |
| # Get company submissions | |
| submissions = self.edgar.get_submissions(cik=cik) | |
| # Extract filing information | |
| filings = [] | |
| recent = submissions.get("filings", {}).get("recent", {}) | |
| # Get data from each field | |
| form_types_list = recent.get("form", []) | |
| filing_dates = recent.get("filingDate", []) | |
| accession_numbers = recent.get("accessionNumber", []) | |
| primary_documents = recent.get("primaryDocument", []) | |
| # Iterate through all filings | |
| for i in range(len(form_types_list)): | |
| form_type = form_types_list[i] | |
| # Filter by form type if specified | |
| if form_types and form_type not in form_types: | |
| continue | |
| filing_date = filing_dates[i] if i < len(filing_dates) else "" | |
| accession_number = accession_numbers[i] if i < len(accession_numbers) else "" | |
| primary_document = primary_documents[i] if i < len(primary_documents) else "" | |
| filing = { | |
| "form_type": form_type, | |
| "filing_date": filing_date, | |
| "accession_number": accession_number, | |
| "primary_document": primary_document | |
| } | |
| filings.append(filing) | |
| return filings | |
| except Exception as e: | |
| print(f"Error getting company filings: {e}") | |
| return [] | |
| def get_company_facts(self, cik): | |
| """ | |
| Get all company financial facts data | |
| Args: | |
| cik (str): Company CIK code | |
| Returns: | |
| dict: Company financial facts data | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api library not installed") | |
| return {} | |
| try: | |
| facts = self.edgar.get_company_facts(cik=cik) | |
| return facts | |
| except Exception as e: | |
| print(f"Error getting company facts: {e}") | |
| return {} | |
| def get_financial_data_for_period(self, cik, period): | |
| """ | |
| Get financial data for a specific period (supports annual and quarterly) | |
| Args: | |
| cik (str): Company CIK code | |
| period (str): Period in format 'YYYY' or 'YYYYQX' (e.g., '2025' or '2025Q3') | |
| Returns: | |
| dict: Financial data dictionary | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api library not installed") | |
| return {} | |
| try: | |
| # Get company financial facts | |
| facts = self.get_company_facts(cik) | |
| if not facts: | |
| return {} | |
| # Extract us-gaap and ifrs-full financial data (20-F may use IFRS) | |
| us_gaap = facts.get("facts", {}).get("us-gaap", {}) | |
| ifrs_full = facts.get("facts", {}).get("ifrs-full", {}) | |
| # Define financial metrics and their XBRL tags | |
| # Include multiple possible tags to improve match rate (including US-GAAP and IFRS tags) | |
| financial_metrics = { | |
| "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"], | |
| "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"], | |
| "earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"], | |
| "operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"], | |
| "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"], | |
| } | |
| # Store result | |
| result = {"period": period} | |
| # Determine target form types to search | |
| if 'Q' in period: | |
| # Quarterly data, mainly search 10-Q (20-F usually doesn't have quarterly reports) | |
| target_forms = ["10-Q"] | |
| target_forms_annual = ["10-K", "20-F"] # for fallback | |
| year = int(period.split('Q')[0]) | |
| quarter = period.split('Q')[1] | |
| else: | |
| # Annual data, search 10-K and 20-F annual forms | |
| target_forms = ["10-K", "20-F"] | |
| target_forms_annual = target_forms | |
| year = int(period) | |
| quarter = None | |
| # Get company filings to find accession number and primary document | |
| filings = self.get_company_filings(cik, form_types=target_forms) | |
| filings_map = {} # Map: form -> {accession_number, primary_document, filing_date} | |
| # Build filing map for quick lookup | |
| for filing in filings: | |
| form_type = filing.get("form_type", "") | |
| filing_date = filing.get("filing_date", "") | |
| accession_number = filing.get("accession_number", "") | |
| primary_document = filing.get("primary_document", "") | |
| if filing_date and accession_number: | |
| # Extract year from filing_date (format: YYYY-MM-DD) | |
| file_year = int(filing_date[:4]) if len(filing_date) >= 4 else 0 | |
| # Store filing if it matches the period year | |
| if file_year == year: | |
| key = f"{form_type}_{file_year}" | |
| if key not in filings_map: | |
| filings_map[key] = { | |
| "accession_number": accession_number, | |
| "primary_document": primary_document, | |
| "form_type": form_type, | |
| "filing_date": filing_date | |
| } | |
| # Iterate through each financial metric | |
| for metric_key, metric_tags in financial_metrics.items(): | |
| # Support multiple possible tags | |
| for metric_tag in metric_tags: | |
| # Search both US-GAAP and IFRS tags | |
| metric_data = None | |
| data_source = None | |
| if metric_tag in us_gaap: | |
| metric_data = us_gaap[metric_tag] | |
| data_source = "us-gaap" | |
| elif metric_tag in ifrs_full: | |
| metric_data = ifrs_full[metric_tag] | |
| data_source = "ifrs-full" | |
| if metric_data: | |
| units = metric_data.get("units", {}) | |
| # Find USD unit data (supports USD and USD/shares) | |
| usd_data = None | |
| if "USD" in units: | |
| usd_data = units["USD"] | |
| elif "USD/shares" in units and metric_key == "earnings_per_share": | |
| # EPS uses USD/shares unit | |
| usd_data = units["USD/shares"] | |
| if usd_data: | |
| # Try exact match first, then loose match | |
| matched_entry = None | |
| # Search for data in the specified period | |
| for entry in usd_data: | |
| form = entry.get("form", "") | |
| fy = entry.get("fy", 0) | |
| fp = entry.get("fp", "") | |
| end_date = entry.get("end", "") | |
| if not end_date or len(end_date) < 4: | |
| continue | |
| entry_year = int(end_date[:4]) | |
| # Check if form type matches | |
| if form in target_forms: | |
| if quarter: | |
| # Quarterly data match | |
| if entry_year == year and fp == f"Q{quarter}": | |
| # If already matched, compare end date, choose the latest | |
| if matched_entry: | |
| if entry.get("end", "") > matched_entry.get("end", ""): | |
| matched_entry = entry | |
| else: | |
| matched_entry = entry | |
| else: | |
| # Annual data match - prioritize fiscal year (fy) field | |
| # Strategy 1: Exact match by fiscal year | |
| if fy == year and (fp == "FY" or fp == "" or not fp): | |
| # If already matched, compare end date, choose the latest | |
| if matched_entry: | |
| if entry.get("end", "") > matched_entry.get("end", ""): | |
| matched_entry = entry | |
| else: | |
| matched_entry = entry | |
| # Strategy 2: Match by end date year (when fy not available or doesn't match) | |
| elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp): | |
| matched_entry = entry | |
| # Strategy 3: Allow fy to differ by 1 year (fiscal year vs calendar year mismatch) | |
| elif not matched_entry and fy > 0 and abs(fy - year) <= 1 and (fp == "FY" or fp == "" or not fp): | |
| matched_entry = entry | |
| # Strategy 4: Match by frame field for 20-F | |
| elif not matched_entry and form == "20-F" and "frame" in entry: | |
| frame = entry.get("frame", "") | |
| if f"CY{year}" in frame or str(year) in end_date: | |
| matched_entry = entry | |
| # If quarterly data not found, try finding from annual report (fallback strategy) | |
| if not matched_entry and quarter and target_forms_annual: | |
| for entry in usd_data: | |
| form = entry.get("form", "") | |
| end_date = entry.get("end", "") | |
| fp = entry.get("fp", "") | |
| if form in target_forms_annual and end_date: | |
| # Check if end date is within this quarter range | |
| if str(year) in end_date and f"Q{quarter}" in fp: | |
| matched_entry = entry | |
| break | |
| # Apply matched data | |
| if matched_entry: | |
| result[metric_key] = matched_entry.get("val", 0) | |
| # Get form and accession info | |
| form_type = matched_entry.get("form", "") | |
| accn_from_facts = matched_entry.get('accn', '').replace('-', '') | |
| # Try to get accession_number and primary_document from filings | |
| filing_key = f"{form_type}_{year}" | |
| filing_info = filings_map.get(filing_key) | |
| if filing_info: | |
| # Use filing info from get_company_filings | |
| accession_number = filing_info["accession_number"].replace('-', '') | |
| primary_document = filing_info["primary_document"] | |
| # Generate complete source URL | |
| if primary_document: | |
| result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}" | |
| else: | |
| result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100" | |
| else: | |
| # Fallback to company browse page if filing not found | |
| result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100" | |
| result["source_form"] = form_type | |
| result["data_source"] = data_source | |
| # Add detailed information | |
| result[f"{metric_key}_details"] = { | |
| "tag": metric_tag, | |
| "form": matched_entry.get("form", ""), | |
| "fy": matched_entry.get("fy", 0), | |
| "fp": matched_entry.get("fp", ""), | |
| "val": matched_entry.get("val", 0), | |
| "start": matched_entry.get("start", ""), | |
| "end": matched_entry.get("end", ""), | |
| "accn": matched_entry.get("accn", ""), | |
| "filed": matched_entry.get("filed", ""), | |
| "frame": matched_entry.get("frame", ""), | |
| "data_source": data_source | |
| } | |
| # If data is found, break out of tag loop | |
| if metric_key in result: | |
| break | |
| return result | |
| except Exception as e: | |
| print(f"Error getting financial data for period {period}: {e}") | |
| return {} | |