Spaces:
Runtime error
Runtime error
| """EDGAR API Client Module""" | |
| import requests | |
| try: | |
| from sec_edgar_api.EdgarClient import EdgarClient | |
| except ImportError: | |
| EdgarClient = None | |
| import json | |
| import time | |
| class EdgarDataClient: | |
| def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"): | |
| """Initialize EDGAR client""" | |
| self.user_agent = user_agent | |
| if EdgarClient: | |
| self.edgar = EdgarClient(user_agent=user_agent) | |
| else: | |
| self.edgar = None | |
| def search_company_by_name(self, company_name): | |
| """Search company CIK by company name""" | |
| try: | |
| # Use SEC company ticker database | |
| url = "https://www.sec.gov/files/company_tickers.json" | |
| headers = {"User-Agent": self.user_agent} | |
| response = requests.get(url, headers=headers) | |
| response.raise_for_status() | |
| companies = response.json() | |
| # Search for matching company names | |
| matches = [] | |
| exact_matches = [] | |
| for _, company in companies.items(): | |
| company_title = company["title"].lower() | |
| search_name = company_name.lower() | |
| # Exact match | |
| if search_name == company_title: | |
| exact_matches.append({ | |
| "cik": str(company["cik_str"]).zfill(10), | |
| "name": company["title"], | |
| "ticker": company["ticker"] | |
| }) | |
| # Partial match | |
| elif search_name in company_title or \ | |
| search_name in company["ticker"].lower(): | |
| matches.append({ | |
| "cik": str(company["cik_str"]).zfill(10), | |
| "name": company["title"], | |
| "ticker": company["ticker"] | |
| }) | |
| # Return exact match first, then partial match | |
| if exact_matches: | |
| return exact_matches[0] | |
| elif matches: | |
| return matches[0] | |
| else: | |
| return None | |
| except Exception as e: | |
| print(f"搜索公司时出错: {e}") | |
| return None | |
| def get_company_info(self, cik): | |
| """ | |
| 获取公司基本信息 | |
| Args: | |
| cik (str): 公司CIK码 | |
| Returns: | |
| dict: 包含公司信息的字典 | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api库未安装") | |
| return None | |
| try: | |
| # 获取公司提交信息 | |
| submissions = self.edgar.get_submissions(cik=cik) | |
| return { | |
| "cik": cik, | |
| "name": submissions.get("name", ""), | |
| "tickers": submissions.get("tickers", []), | |
| "sic": submissions.get("sic", ""), | |
| "sic_description": submissions.get("sicDescription", "") | |
| } | |
| except Exception as e: | |
| print(f"获取公司信息时出错: {e}") | |
| return None | |
| def get_company_filings(self, cik, form_types=None): | |
| """ | |
| 获取公司所有财报文件列表 | |
| Args: | |
| cik (str): 公司CIK码 | |
| form_types (list): 财报类型列表,如['10-K', '10-Q'],默认为None表示获取所有类型 | |
| Returns: | |
| list: 财报文件列表 | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api库未安装") | |
| return [] | |
| try: | |
| # 获取公司提交信息 | |
| submissions = self.edgar.get_submissions(cik=cik) | |
| # 提取财报信息 | |
| filings = [] | |
| recent = submissions.get("filings", {}).get("recent", {}) | |
| # 获取各个字段的数据 | |
| form_types_list = recent.get("form", []) | |
| filing_dates = recent.get("filingDate", []) | |
| accession_numbers = recent.get("accessionNumber", []) | |
| primary_documents = recent.get("primaryDocument", []) | |
| # 遍历所有财报 | |
| for i in range(len(form_types_list)): | |
| form_type = form_types_list[i] | |
| # 如果指定了财报类型,则只返回匹配的类型 | |
| if form_types and form_type not in form_types: | |
| continue | |
| filing_date = filing_dates[i] if i < len(filing_dates) else "" | |
| accession_number = accession_numbers[i] if i < len(accession_numbers) else "" | |
| primary_document = primary_documents[i] if i < len(primary_documents) else "" | |
| filing = { | |
| "form_type": form_type, | |
| "filing_date": filing_date, | |
| "accession_number": accession_number, | |
| "primary_document": primary_document | |
| } | |
| filings.append(filing) | |
| return filings | |
| except Exception as e: | |
| print(f"获取公司财报列表时出错: {e}") | |
| return [] | |
| def get_company_facts(self, cik): | |
| """ | |
| 获取公司所有财务事实数据 | |
| Args: | |
| cik (str): 公司CIK码 | |
| Returns: | |
| dict: 公司财务事实数据 | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api库未安装") | |
| return {} | |
| try: | |
| facts = self.edgar.get_company_facts(cik=cik) | |
| return facts | |
| except Exception as e: | |
| print(f"获取公司财务事实时出错: {e}") | |
| return {} | |
| def get_financial_data_for_period(self, cik, period): | |
| """ | |
| 获取指定期间的财务数据(支持年度和季度) | |
| Args: | |
| cik (str): 公司CIK码 | |
| period (str): 期间,格式为'YYYY'或'YYYYQX'(如'2025'或'2025Q3') | |
| Returns: | |
| dict: 财务数据字典 | |
| """ | |
| if not self.edgar: | |
| print("sec_edgar_api库未安装") | |
| return {} | |
| try: | |
| # 获取公司财务事实 | |
| facts = self.get_company_facts(cik) | |
| if not facts: | |
| return {} | |
| # 提取us-gaap和ifrs-full部分的财务数据(20-F可能使用IFRS) | |
| us_gaap = facts.get("facts", {}).get("us-gaap", {}) | |
| ifrs_full = facts.get("facts", {}).get("ifrs-full", {}) | |
| # 定义要获取的财务指标及其XBRL标签 | |
| # 包含多个可能的标签以提高匹配率(包括US-GAAP和IFRS标签) | |
| financial_metrics = { | |
| "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"], | |
| "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"], | |
| "earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"], | |
| "operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"], | |
| "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"], | |
| } | |
| # 存储结果 | |
| result = {"period": period} | |
| # 确定要查找的表格类型 | |
| if 'Q' in period: | |
| # 季度数据,主要查找10-Q(20-F通常没有季度报告) | |
| target_forms = ["10-Q"] | |
| target_forms_annual = ["10-K", "20-F"] # 用于回退查找 | |
| year = int(period.split('Q')[0]) | |
| quarter = period.split('Q')[1] | |
| else: | |
| # 年度数据,查找10-K和20-F年度表格 | |
| target_forms = ["10-K", "20-F"] | |
| target_forms_annual = target_forms | |
| year = int(period) | |
| quarter = None | |
| # 遍历每个财务指标 | |
| for metric_key, metric_tags in financial_metrics.items(): | |
| # 支持多个可能的标签 | |
| for metric_tag in metric_tags: | |
| # 同时查找US-GAAP和IFRS标签 | |
| metric_data = None | |
| data_source = None | |
| if metric_tag in us_gaap: | |
| metric_data = us_gaap[metric_tag] | |
| data_source = "us-gaap" | |
| elif metric_tag in ifrs_full: | |
| metric_data = ifrs_full[metric_tag] | |
| data_source = "ifrs-full" | |
| if metric_data: | |
| units = metric_data.get("units", {}) | |
| # 查找美元单位的数据(支持USD和USD/shares) | |
| usd_data = None | |
| if "USD" in units: | |
| usd_data = units["USD"] | |
| elif "USD/shares" in units and metric_key == "earnings_per_share": | |
| # EPS使用USD/shares单位 | |
| usd_data = units["USD/shares"] | |
| if usd_data: | |
| # 首先尝试精确匹配,然后尝试宽松匹配 | |
| matched_entry = None | |
| # 查找指定期间的数据 | |
| for entry in usd_data: | |
| form = entry.get("form", "") | |
| fy = entry.get("fy", 0) | |
| fp = entry.get("fp", "") | |
| end_date = entry.get("end", "") | |
| if not end_date or len(end_date) < 4: | |
| continue | |
| entry_year = int(end_date[:4]) | |
| # 检查表格类型是否匹配 | |
| if form in target_forms: | |
| if quarter: | |
| # 季度数据匹配 | |
| if entry_year == year and fp == f"Q{quarter}": | |
| # 如果已有匹配,比较end date,选择最新的 | |
| if matched_entry: | |
| if entry.get("end", "") > matched_entry.get("end", ""): | |
| matched_entry = entry | |
| else: | |
| matched_entry = entry | |
| else: | |
| # 年度数据匹配 - 优先匹配FY字段 | |
| if fy == year and (fp == "FY" or fp == "" or not fp): | |
| # 如果已有匹配,比较end date,选择最新的(最近的财年结束日期) | |
| if matched_entry: | |
| if entry.get("end", "") > matched_entry.get("end", ""): | |
| matched_entry = entry | |
| else: | |
| matched_entry = entry | |
| # 备选:匹配end日期的年份(仅当没有FY匹配时) | |
| elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp): | |
| matched_entry = entry | |
| # 20-F特殊处理:有些20-F没有FY标记,通过frame字段匹配 | |
| elif not matched_entry and form == "20-F" and "frame" in entry: | |
| frame = entry.get("frame", "") | |
| if f"CY{year}" in frame or str(year) in end_date: | |
| matched_entry = entry | |
| # 如果季度数据没找到,尝试从年度报告中查找(回退策略) | |
| if not matched_entry and quarter and target_forms_annual: | |
| for entry in usd_data: | |
| form = entry.get("form", "") | |
| end_date = entry.get("end", "") | |
| fp = entry.get("fp", "") | |
| if form in target_forms_annual and end_date: | |
| # 检查结束日期是否在该季度范围内 | |
| if str(year) in end_date and f"Q{quarter}" in fp: | |
| matched_entry = entry | |
| break | |
| # 应用匹配的数据 | |
| if matched_entry: | |
| result[metric_key] = matched_entry.get("val", 0) | |
| # 添加数据来源信息 | |
| accn = matched_entry.get('accn', '').replace('-', '') | |
| result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accn}" | |
| result["source_form"] = matched_entry.get("form", "") | |
| result["data_source"] = data_source | |
| # 添加详细信息 | |
| result[f"{metric_key}_details"] = { | |
| "tag": metric_tag, | |
| "form": matched_entry.get("form", ""), | |
| "fy": matched_entry.get("fy", 0), | |
| "fp": matched_entry.get("fp", ""), | |
| "val": matched_entry.get("val", 0), | |
| "start": matched_entry.get("start", ""), | |
| "end": matched_entry.get("end", ""), | |
| "accn": matched_entry.get("accn", ""), | |
| "filed": matched_entry.get("filed", ""), | |
| "frame": matched_entry.get("frame", ""), | |
| "data_source": data_source | |
| } | |
| # 如果找到了数据,就跳出标签循环 | |
| if metric_key in result: | |
| break | |
| return result | |
| except Exception as e: | |
| print(f"获取{period}期间财务数据时出错: {e}") | |
| return {} | |