EasyReportDataMCP / edgar_client.py
JC321's picture
Upload 8 files
98e3256 verified
raw
history blame
15.8 kB
"""EDGAR API Client Module"""
import requests
try:
from sec_edgar_api.EdgarClient import EdgarClient
except ImportError:
EdgarClient = None
import json
import time
class EdgarDataClient:
def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"):
"""Initialize EDGAR client"""
self.user_agent = user_agent
if EdgarClient:
self.edgar = EdgarClient(user_agent=user_agent)
else:
self.edgar = None
def search_company_by_name(self, company_name):
"""Search company CIK by company name"""
try:
# Use SEC company ticker database
url = "https://www.sec.gov/files/company_tickers.json"
headers = {"User-Agent": self.user_agent}
response = requests.get(url, headers=headers)
response.raise_for_status()
companies = response.json()
# Search for matching company names
matches = []
exact_matches = []
for _, company in companies.items():
company_title = company["title"].lower()
search_name = company_name.lower()
# Exact match
if search_name == company_title:
exact_matches.append({
"cik": str(company["cik_str"]).zfill(10),
"name": company["title"],
"ticker": company["ticker"]
})
# Partial match
elif search_name in company_title or \
search_name in company["ticker"].lower():
matches.append({
"cik": str(company["cik_str"]).zfill(10),
"name": company["title"],
"ticker": company["ticker"]
})
# Return exact match first, then partial match
if exact_matches:
return exact_matches[0]
elif matches:
return matches[0]
else:
return None
except Exception as e:
print(f"搜索公司时出错: {e}")
return None
def get_company_info(self, cik):
"""
获取公司基本信息
Args:
cik (str): 公司CIK码
Returns:
dict: 包含公司信息的字典
"""
if not self.edgar:
print("sec_edgar_api库未安装")
return None
try:
# 获取公司提交信息
submissions = self.edgar.get_submissions(cik=cik)
return {
"cik": cik,
"name": submissions.get("name", ""),
"tickers": submissions.get("tickers", []),
"sic": submissions.get("sic", ""),
"sic_description": submissions.get("sicDescription", "")
}
except Exception as e:
print(f"获取公司信息时出错: {e}")
return None
def get_company_filings(self, cik, form_types=None):
"""
获取公司所有财报文件列表
Args:
cik (str): 公司CIK码
form_types (list): 财报类型列表,如['10-K', '10-Q'],默认为None表示获取所有类型
Returns:
list: 财报文件列表
"""
if not self.edgar:
print("sec_edgar_api库未安装")
return []
try:
# 获取公司提交信息
submissions = self.edgar.get_submissions(cik=cik)
# 提取财报信息
filings = []
recent = submissions.get("filings", {}).get("recent", {})
# 获取各个字段的数据
form_types_list = recent.get("form", [])
filing_dates = recent.get("filingDate", [])
accession_numbers = recent.get("accessionNumber", [])
primary_documents = recent.get("primaryDocument", [])
# 遍历所有财报
for i in range(len(form_types_list)):
form_type = form_types_list[i]
# 如果指定了财报类型,则只返回匹配的类型
if form_types and form_type not in form_types:
continue
filing_date = filing_dates[i] if i < len(filing_dates) else ""
accession_number = accession_numbers[i] if i < len(accession_numbers) else ""
primary_document = primary_documents[i] if i < len(primary_documents) else ""
filing = {
"form_type": form_type,
"filing_date": filing_date,
"accession_number": accession_number,
"primary_document": primary_document
}
filings.append(filing)
return filings
except Exception as e:
print(f"获取公司财报列表时出错: {e}")
return []
def get_company_facts(self, cik):
"""
获取公司所有财务事实数据
Args:
cik (str): 公司CIK码
Returns:
dict: 公司财务事实数据
"""
if not self.edgar:
print("sec_edgar_api库未安装")
return {}
try:
facts = self.edgar.get_company_facts(cik=cik)
return facts
except Exception as e:
print(f"获取公司财务事实时出错: {e}")
return {}
def get_financial_data_for_period(self, cik, period):
"""
获取指定期间的财务数据(支持年度和季度)
Args:
cik (str): 公司CIK码
period (str): 期间,格式为'YYYY'或'YYYYQX'(如'2025'或'2025Q3')
Returns:
dict: 财务数据字典
"""
if not self.edgar:
print("sec_edgar_api库未安装")
return {}
try:
# 获取公司财务事实
facts = self.get_company_facts(cik)
if not facts:
return {}
# 提取us-gaap和ifrs-full部分的财务数据(20-F可能使用IFRS)
us_gaap = facts.get("facts", {}).get("us-gaap", {})
ifrs_full = facts.get("facts", {}).get("ifrs-full", {})
# 定义要获取的财务指标及其XBRL标签
# 包含多个可能的标签以提高匹配率(包括US-GAAP和IFRS标签)
financial_metrics = {
"total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"],
"net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"],
"earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"],
"operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"],
"operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"],
}
# 存储结果
result = {"period": period}
# 确定要查找的表格类型
if 'Q' in period:
# 季度数据,主要查找10-Q(20-F通常没有季度报告)
target_forms = ["10-Q"]
target_forms_annual = ["10-K", "20-F"] # 用于回退查找
year = int(period.split('Q')[0])
quarter = period.split('Q')[1]
else:
# 年度数据,查找10-K和20-F年度表格
target_forms = ["10-K", "20-F"]
target_forms_annual = target_forms
year = int(period)
quarter = None
# 遍历每个财务指标
for metric_key, metric_tags in financial_metrics.items():
# 支持多个可能的标签
for metric_tag in metric_tags:
# 同时查找US-GAAP和IFRS标签
metric_data = None
data_source = None
if metric_tag in us_gaap:
metric_data = us_gaap[metric_tag]
data_source = "us-gaap"
elif metric_tag in ifrs_full:
metric_data = ifrs_full[metric_tag]
data_source = "ifrs-full"
if metric_data:
units = metric_data.get("units", {})
# 查找美元单位的数据(支持USD和USD/shares)
usd_data = None
if "USD" in units:
usd_data = units["USD"]
elif "USD/shares" in units and metric_key == "earnings_per_share":
# EPS使用USD/shares单位
usd_data = units["USD/shares"]
if usd_data:
# 首先尝试精确匹配,然后尝试宽松匹配
matched_entry = None
# 查找指定期间的数据
for entry in usd_data:
form = entry.get("form", "")
fy = entry.get("fy", 0)
fp = entry.get("fp", "")
end_date = entry.get("end", "")
if not end_date or len(end_date) < 4:
continue
entry_year = int(end_date[:4])
# 检查表格类型是否匹配
if form in target_forms:
if quarter:
# 季度数据匹配
if entry_year == year and fp == f"Q{quarter}":
# 如果已有匹配,比较end date,选择最新的
if matched_entry:
if entry.get("end", "") > matched_entry.get("end", ""):
matched_entry = entry
else:
matched_entry = entry
else:
# 年度数据匹配 - 优先匹配FY字段
if fy == year and (fp == "FY" or fp == "" or not fp):
# 如果已有匹配,比较end date,选择最新的(最近的财年结束日期)
if matched_entry:
if entry.get("end", "") > matched_entry.get("end", ""):
matched_entry = entry
else:
matched_entry = entry
# 备选:匹配end日期的年份(仅当没有FY匹配时)
elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp):
matched_entry = entry
# 20-F特殊处理:有些20-F没有FY标记,通过frame字段匹配
elif not matched_entry and form == "20-F" and "frame" in entry:
frame = entry.get("frame", "")
if f"CY{year}" in frame or str(year) in end_date:
matched_entry = entry
# 如果季度数据没找到,尝试从年度报告中查找(回退策略)
if not matched_entry and quarter and target_forms_annual:
for entry in usd_data:
form = entry.get("form", "")
end_date = entry.get("end", "")
fp = entry.get("fp", "")
if form in target_forms_annual and end_date:
# 检查结束日期是否在该季度范围内
if str(year) in end_date and f"Q{quarter}" in fp:
matched_entry = entry
break
# 应用匹配的数据
if matched_entry:
result[metric_key] = matched_entry.get("val", 0)
# 添加数据来源信息
accn = matched_entry.get('accn', '').replace('-', '')
result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accn}"
result["source_form"] = matched_entry.get("form", "")
result["data_source"] = data_source
# 添加详细信息
result[f"{metric_key}_details"] = {
"tag": metric_tag,
"form": matched_entry.get("form", ""),
"fy": matched_entry.get("fy", 0),
"fp": matched_entry.get("fp", ""),
"val": matched_entry.get("val", 0),
"start": matched_entry.get("start", ""),
"end": matched_entry.get("end", ""),
"accn": matched_entry.get("accn", ""),
"filed": matched_entry.get("filed", ""),
"frame": matched_entry.get("frame", ""),
"data_source": data_source
}
# 如果找到了数据,就跳出标签循环
if metric_key in result:
break
return result
except Exception as e:
print(f"获取{period}期间财务数据时出错: {e}")
return {}