JC321 commited on
Commit
b5415fc
·
verified ·
1 Parent(s): 6a0bd49

Upload 2 files

Browse files
Files changed (2) hide show
  1. edgar_client.py +108 -61
  2. financial_analyzer.py +56 -56
edgar_client.py CHANGED
@@ -62,25 +62,25 @@ class EdgarDataClient:
62
  return None
63
 
64
  except Exception as e:
65
- print(f"搜索公司时出错: {e}")
66
  return None
67
 
68
  def get_company_info(self, cik):
69
  """
70
- 获取公司基本信息
71
 
72
  Args:
73
- cik (str): 公司CIK
74
 
75
  Returns:
76
- dict: 包含公司信息的字典
77
  """
78
  if not self.edgar:
79
- print("sec_edgar_api库未安装")
80
  return None
81
 
82
  try:
83
- # 获取公司提交信息
84
  submissions = self.edgar.get_submissions(cik=cik)
85
 
86
  return {
@@ -91,43 +91,43 @@ class EdgarDataClient:
91
  "sic_description": submissions.get("sicDescription", "")
92
  }
93
  except Exception as e:
94
- print(f"获取公司信息时出错: {e}")
95
  return None
96
 
97
  def get_company_filings(self, cik, form_types=None):
98
  """
99
- 获取公司所有财报文件列表
100
 
101
  Args:
102
- cik (str): 公司CIK
103
- form_types (list): 财报类型列表,如['10-K', '10-Q'],默认为None表示获取所有类型
104
 
105
  Returns:
106
- list: 财报文件列表
107
  """
108
  if not self.edgar:
109
- print("sec_edgar_api库未安装")
110
  return []
111
 
112
  try:
113
- # 获取公司提交信息
114
  submissions = self.edgar.get_submissions(cik=cik)
115
 
116
- # 提取财报信息
117
  filings = []
118
  recent = submissions.get("filings", {}).get("recent", {})
119
 
120
- # 获取各个字段的数据
121
  form_types_list = recent.get("form", [])
122
  filing_dates = recent.get("filingDate", [])
123
  accession_numbers = recent.get("accessionNumber", [])
124
  primary_documents = recent.get("primaryDocument", [])
125
 
126
- # 遍历所有财报
127
  for i in range(len(form_types_list)):
128
  form_type = form_types_list[i]
129
 
130
- # 如果指定了财报类型,则只返回匹配的类型
131
  if form_types and form_type not in form_types:
132
  continue
133
 
@@ -146,58 +146,58 @@ class EdgarDataClient:
146
 
147
  return filings
148
  except Exception as e:
149
- print(f"获取公司财报列表时出错: {e}")
150
  return []
151
 
152
  def get_company_facts(self, cik):
153
  """
154
- 获取公司所有财务事实数据
155
 
156
  Args:
157
- cik (str): 公司CIK
158
 
159
  Returns:
160
- dict: 公司财务事实数据
161
  """
162
  if not self.edgar:
163
- print("sec_edgar_api库未安装")
164
  return {}
165
 
166
  try:
167
  facts = self.edgar.get_company_facts(cik=cik)
168
  return facts
169
  except Exception as e:
170
- print(f"获取公司财务事实时出错: {e}")
171
  return {}
172
 
173
  def get_financial_data_for_period(self, cik, period):
174
  """
175
- 获取指定期间的财务数据(支持年度和季度)
176
 
177
  Args:
178
- cik (str): 公司CIK
179
- period (str): 期间,格式为'YYYY''YYYYQX'(如'2025''2025Q3'
180
 
181
  Returns:
182
- dict: 财务数据字典
183
  """
184
  if not self.edgar:
185
- print("sec_edgar_api库未安装")
186
  return {}
187
 
188
  try:
189
- # 获取公司财务事实
190
  facts = self.get_company_facts(cik)
191
 
192
  if not facts:
193
  return {}
194
 
195
- # 提取us-gaapifrs-full部分的财务数据(20-F可能使用IFRS
196
  us_gaap = facts.get("facts", {}).get("us-gaap", {})
197
  ifrs_full = facts.get("facts", {}).get("ifrs-full", {})
198
 
199
- # 定义要获取的财务指标及其XBRL标签
200
- # 包含多个可能的标签以提高匹配率(包括US-GAAPIFRS标签)
201
  financial_metrics = {
202
  "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"],
203
  "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"],
@@ -206,28 +206,54 @@ class EdgarDataClient:
206
  "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"],
207
  }
208
 
209
- # 存储结果
210
  result = {"period": period}
211
 
212
- # 确定要查找的表格类型
213
  if 'Q' in period:
214
- # 季度数据,主要查找10-Q20-F通常没有季度报告)
215
  target_forms = ["10-Q"]
216
- target_forms_annual = ["10-K", "20-F"] # 用于回退查找
217
  year = int(period.split('Q')[0])
218
  quarter = period.split('Q')[1]
219
  else:
220
- # 年度数据,查找10-K20-F年度表格
221
  target_forms = ["10-K", "20-F"]
222
  target_forms_annual = target_forms
223
  year = int(period)
224
  quarter = None
225
 
226
- # 遍历每个财务指标
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
227
  for metric_key, metric_tags in financial_metrics.items():
228
- # 支持多个可能的标签
229
  for metric_tag in metric_tags:
230
- # 同时查找US-GAAPIFRS标签
231
  metric_data = None
232
  data_source = None
233
 
@@ -241,19 +267,19 @@ class EdgarDataClient:
241
  if metric_data:
242
  units = metric_data.get("units", {})
243
 
244
- # 查找美元单位的数据(支持USDUSD/shares
245
  usd_data = None
246
  if "USD" in units:
247
  usd_data = units["USD"]
248
  elif "USD/shares" in units and metric_key == "earnings_per_share":
249
- # EPS使用USD/shares单位
250
  usd_data = units["USD/shares"]
251
 
252
  if usd_data:
253
- # 首先尝试精确匹配,然后尝试宽松匹配
254
  matched_entry = None
255
 
256
- # 查找指定期间的数据
257
  for entry in usd_data:
258
  form = entry.get("form", "")
259
  fy = entry.get("fy", 0)
@@ -265,36 +291,36 @@ class EdgarDataClient:
265
 
266
  entry_year = int(end_date[:4])
267
 
268
- # 检查表格类型是否匹配
269
  if form in target_forms:
270
  if quarter:
271
- # 季度数据匹配
272
  if entry_year == year and fp == f"Q{quarter}":
273
- # 如果已有匹配,比较end date,选择最新的
274
  if matched_entry:
275
  if entry.get("end", "") > matched_entry.get("end", ""):
276
  matched_entry = entry
277
  else:
278
  matched_entry = entry
279
  else:
280
- # 年度数据匹配 - 优先匹配FY字段
281
  if fy == year and (fp == "FY" or fp == "" or not fp):
282
- # 如果已有匹配,比较end date,选择最新的(最近的财年结束日期)
283
  if matched_entry:
284
  if entry.get("end", "") > matched_entry.get("end", ""):
285
  matched_entry = entry
286
  else:
287
  matched_entry = entry
288
- # 备选:匹配end日期的年份(仅当没有FY匹配时)
289
  elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp):
290
  matched_entry = entry
291
- # 20-F特殊处理:有些20-F没有FY标记,通过frame字段匹配
292
  elif not matched_entry and form == "20-F" and "frame" in entry:
293
  frame = entry.get("frame", "")
294
  if f"CY{year}" in frame or str(year) in end_date:
295
  matched_entry = entry
296
 
297
- # 如果季度数据没找到,尝试从年度报告中查找(回退策略)
298
  if not matched_entry and quarter and target_forms_annual:
299
  for entry in usd_data:
300
  form = entry.get("form", "")
@@ -302,20 +328,41 @@ class EdgarDataClient:
302
  fp = entry.get("fp", "")
303
 
304
  if form in target_forms_annual and end_date:
305
- # 检查结束日期是否在该季度范围内
306
  if str(year) in end_date and f"Q{quarter}" in fp:
307
  matched_entry = entry
308
  break
309
 
310
- # 应用匹配的数据
311
  if matched_entry:
312
  result[metric_key] = matched_entry.get("val", 0)
313
- # 添加数据来源信息
314
- accn = matched_entry.get('accn', '').replace('-', '')
315
- result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accn}"
316
- result["source_form"] = matched_entry.get("form", "")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  result["data_source"] = data_source
318
- # 添加详细信息
 
319
  result[f"{metric_key}_details"] = {
320
  "tag": metric_tag,
321
  "form": matched_entry.get("form", ""),
@@ -330,13 +377,13 @@ class EdgarDataClient:
330
  "data_source": data_source
331
  }
332
 
333
- # 如果找到了数据,就跳出标签循环
334
  if metric_key in result:
335
  break
336
 
337
  return result
338
  except Exception as e:
339
- print(f"获取{period}期间财务数据时出错: {e}")
340
  return {}
341
 
342
 
 
62
  return None
63
 
64
  except Exception as e:
65
+ print(f"Error searching company: {e}")
66
  return None
67
 
68
  def get_company_info(self, cik):
69
  """
70
+ Get basic company information
71
 
72
  Args:
73
+ cik (str): Company CIK code
74
 
75
  Returns:
76
+ dict: Dictionary containing company information
77
  """
78
  if not self.edgar:
79
+ print("sec_edgar_api library not installed")
80
  return None
81
 
82
  try:
83
+ # Get company submissions
84
  submissions = self.edgar.get_submissions(cik=cik)
85
 
86
  return {
 
91
  "sic_description": submissions.get("sicDescription", "")
92
  }
93
  except Exception as e:
94
+ print(f"Error getting company info: {e}")
95
  return None
96
 
97
  def get_company_filings(self, cik, form_types=None):
98
  """
99
+ Get all company filing documents
100
 
101
  Args:
102
+ cik (str): Company CIK code
103
+ form_types (list): List of form types, e.g., ['10-K', '10-Q'], None for all types
104
 
105
  Returns:
106
+ list: List of filing documents
107
  """
108
  if not self.edgar:
109
+ print("sec_edgar_api library not installed")
110
  return []
111
 
112
  try:
113
+ # Get company submissions
114
  submissions = self.edgar.get_submissions(cik=cik)
115
 
116
+ # Extract filing information
117
  filings = []
118
  recent = submissions.get("filings", {}).get("recent", {})
119
 
120
+ # Get data from each field
121
  form_types_list = recent.get("form", [])
122
  filing_dates = recent.get("filingDate", [])
123
  accession_numbers = recent.get("accessionNumber", [])
124
  primary_documents = recent.get("primaryDocument", [])
125
 
126
+ # Iterate through all filings
127
  for i in range(len(form_types_list)):
128
  form_type = form_types_list[i]
129
 
130
+ # Filter by form type if specified
131
  if form_types and form_type not in form_types:
132
  continue
133
 
 
146
 
147
  return filings
148
  except Exception as e:
149
+ print(f"Error getting company filings: {e}")
150
  return []
151
 
152
  def get_company_facts(self, cik):
153
  """
154
+ Get all company financial facts data
155
 
156
  Args:
157
+ cik (str): Company CIK code
158
 
159
  Returns:
160
+ dict: Company financial facts data
161
  """
162
  if not self.edgar:
163
+ print("sec_edgar_api library not installed")
164
  return {}
165
 
166
  try:
167
  facts = self.edgar.get_company_facts(cik=cik)
168
  return facts
169
  except Exception as e:
170
+ print(f"Error getting company facts: {e}")
171
  return {}
172
 
173
  def get_financial_data_for_period(self, cik, period):
174
  """
175
+ Get financial data for a specific period (supports annual and quarterly)
176
 
177
  Args:
178
+ cik (str): Company CIK code
179
+ period (str): Period in format 'YYYY' or 'YYYYQX' (e.g., '2025' or '2025Q3')
180
 
181
  Returns:
182
+ dict: Financial data dictionary
183
  """
184
  if not self.edgar:
185
+ print("sec_edgar_api library not installed")
186
  return {}
187
 
188
  try:
189
+ # Get company financial facts
190
  facts = self.get_company_facts(cik)
191
 
192
  if not facts:
193
  return {}
194
 
195
+ # Extract us-gaap and ifrs-full financial data (20-F may use IFRS)
196
  us_gaap = facts.get("facts", {}).get("us-gaap", {})
197
  ifrs_full = facts.get("facts", {}).get("ifrs-full", {})
198
 
199
+ # Define financial metrics and their XBRL tags
200
+ # Include multiple possible tags to improve match rate (including US-GAAP and IFRS tags)
201
  financial_metrics = {
202
  "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"],
203
  "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"],
 
206
  "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"],
207
  }
208
 
209
+ # Store result
210
  result = {"period": period}
211
 
212
+ # Determine target form types to search
213
  if 'Q' in period:
214
+ # Quarterly data, mainly search 10-Q (20-F usually doesn't have quarterly reports)
215
  target_forms = ["10-Q"]
216
+ target_forms_annual = ["10-K", "20-F"] # for fallback
217
  year = int(period.split('Q')[0])
218
  quarter = period.split('Q')[1]
219
  else:
220
+ # Annual data, search 10-K and 20-F annual forms
221
  target_forms = ["10-K", "20-F"]
222
  target_forms_annual = target_forms
223
  year = int(period)
224
  quarter = None
225
 
226
+ # Get company filings to find accession number and primary document
227
+ filings = self.get_company_filings(cik, form_types=target_forms)
228
+ filings_map = {} # Map: form -> {accession_number, primary_document, filing_date}
229
+
230
+ # Build filing map for quick lookup
231
+ for filing in filings:
232
+ form_type = filing.get("form_type", "")
233
+ filing_date = filing.get("filing_date", "")
234
+ accession_number = filing.get("accession_number", "")
235
+ primary_document = filing.get("primary_document", "")
236
+
237
+ if filing_date and accession_number:
238
+ # Extract year from filing_date (format: YYYY-MM-DD)
239
+ file_year = int(filing_date[:4]) if len(filing_date) >= 4 else 0
240
+
241
+ # Store filing if it matches the period year
242
+ if file_year == year:
243
+ key = f"{form_type}_{file_year}"
244
+ if key not in filings_map:
245
+ filings_map[key] = {
246
+ "accession_number": accession_number,
247
+ "primary_document": primary_document,
248
+ "form_type": form_type,
249
+ "filing_date": filing_date
250
+ }
251
+
252
+ # Iterate through each financial metric
253
  for metric_key, metric_tags in financial_metrics.items():
254
+ # Support multiple possible tags
255
  for metric_tag in metric_tags:
256
+ # Search both US-GAAP and IFRS tags
257
  metric_data = None
258
  data_source = None
259
 
 
267
  if metric_data:
268
  units = metric_data.get("units", {})
269
 
270
+ # Find USD unit data (supports USD and USD/shares)
271
  usd_data = None
272
  if "USD" in units:
273
  usd_data = units["USD"]
274
  elif "USD/shares" in units and metric_key == "earnings_per_share":
275
+ # EPS uses USD/shares unit
276
  usd_data = units["USD/shares"]
277
 
278
  if usd_data:
279
+ # Try exact match first, then loose match
280
  matched_entry = None
281
 
282
+ # Search for data in the specified period
283
  for entry in usd_data:
284
  form = entry.get("form", "")
285
  fy = entry.get("fy", 0)
 
291
 
292
  entry_year = int(end_date[:4])
293
 
294
+ # Check if form type matches
295
  if form in target_forms:
296
  if quarter:
297
+ # Quarterly data match
298
  if entry_year == year and fp == f"Q{quarter}":
299
+ # If already matched, compare end date, choose the latest
300
  if matched_entry:
301
  if entry.get("end", "") > matched_entry.get("end", ""):
302
  matched_entry = entry
303
  else:
304
  matched_entry = entry
305
  else:
306
+ # Annual data match - prioritize FY field match
307
  if fy == year and (fp == "FY" or fp == "" or not fp):
308
+ # If already matched, compare end date, choose the latest (most recent fiscal year end date)
309
  if matched_entry:
310
  if entry.get("end", "") > matched_entry.get("end", ""):
311
  matched_entry = entry
312
  else:
313
  matched_entry = entry
314
+ # Alternative: match end date year (only when no FY match)
315
  elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp):
316
  matched_entry = entry
317
+ # Special handling for 20-F: some 20-F don't have FY marker, match via frame field
318
  elif not matched_entry and form == "20-F" and "frame" in entry:
319
  frame = entry.get("frame", "")
320
  if f"CY{year}" in frame or str(year) in end_date:
321
  matched_entry = entry
322
 
323
+ # If quarterly data not found, try finding from annual report (fallback strategy)
324
  if not matched_entry and quarter and target_forms_annual:
325
  for entry in usd_data:
326
  form = entry.get("form", "")
 
328
  fp = entry.get("fp", "")
329
 
330
  if form in target_forms_annual and end_date:
331
+ # Check if end date is within this quarter range
332
  if str(year) in end_date and f"Q{quarter}" in fp:
333
  matched_entry = entry
334
  break
335
 
336
+ # Apply matched data
337
  if matched_entry:
338
  result[metric_key] = matched_entry.get("val", 0)
339
+
340
+ # Get form and accession info
341
+ form_type = matched_entry.get("form", "")
342
+ accn_from_facts = matched_entry.get('accn', '').replace('-', '')
343
+
344
+ # Try to get accession_number and primary_document from filings
345
+ filing_key = f"{form_type}_{year}"
346
+ filing_info = filings_map.get(filing_key)
347
+
348
+ if filing_info:
349
+ # Use filing info from get_company_filings
350
+ accession_number = filing_info["accession_number"].replace('-', '')
351
+ primary_document = filing_info["primary_document"]
352
+
353
+ # Generate complete source URL
354
+ if primary_document:
355
+ result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}"
356
+ else:
357
+ result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
358
+ else:
359
+ # Fallback to company browse page if filing not found
360
+ result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
361
+
362
+ result["source_form"] = form_type
363
  result["data_source"] = data_source
364
+
365
+ # Add detailed information
366
  result[f"{metric_key}_details"] = {
367
  "tag": metric_tag,
368
  "form": matched_entry.get("form", ""),
 
377
  "data_source": data_source
378
  }
379
 
380
+ # If data is found, break out of tag loop
381
  if metric_key in result:
382
  break
383
 
384
  return result
385
  except Exception as e:
386
+ print(f"Error getting financial data for period {period}: {e}")
387
  return {}
388
 
389
 
financial_analyzer.py CHANGED
@@ -8,92 +8,92 @@ import json
8
  class FinancialAnalyzer:
9
  def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"):
10
  """
11
- 初始化财务分析器
12
 
13
  Args:
14
- user_agent (str): 用户代理字符串,用于识别请求来源
15
  """
16
  self.edgar_client = EdgarDataClient(user_agent)
17
 
18
  def search_company(self, company_input):
19
  """
20
- 搜索公司信息(通过名称或CIK
21
 
22
  Args:
23
- company_input (str): 公司名称或CIK
24
 
25
  Returns:
26
- dict: 公司信息
27
  """
28
- # 如果输入是数字,假设它是CIK
29
  if company_input.isdigit() and len(company_input) >= 8:
30
- # 获取公司信息
31
  company_info = self.edgar_client.get_company_info(company_input)
32
  if company_info:
33
  return company_info
34
  else:
35
- return {"error": "未找到指定CIK的公司"}
36
  else:
37
- # 通过名称搜索公司
38
  company = self.edgar_client.search_company_by_name(company_input)
39
  if company:
40
- # 获取详细信息
41
  company_info = self.edgar_client.get_company_info(company['cik'])
42
  if company_info:
43
  return company_info
44
  else:
45
- # 如果无法获取详细信息,返回基本信息
46
  return {
47
  "cik": company['cik'],
48
  "name": company['name'],
49
  "tickers": [company['ticker']] if company['ticker'] else []
50
  }
51
  else:
52
- return {"error": "未找到匹配的公司"}
53
 
54
  def get_company_filings_list(self, cik, form_types=['10-K', '10-Q']):
55
  """
56
- 获取公司财报列表
57
 
58
  Args:
59
- cik (str): 公司CIK
60
- form_types (list): 财报类型列表
61
 
62
  Returns:
63
- list: 财报列表
64
  """
65
  filings = self.edgar_client.get_company_filings(cik, form_types)
66
  return filings
67
 
68
  def extract_financial_metrics(self, cik, years=3):
69
  """
70
- 提取指定年数的财务指标
71
 
72
  Args:
73
- cik (str): 公司CIK
74
- years (int): 要提取的年数,默认为3
75
 
76
  Returns:
77
- list: 财务数据列表
78
  """
79
- # 直接从company facts中获取所有可用的财年数据
80
- # 这样可以避免filing datefiscal year不匹配的问题
81
  financial_data = []
82
 
83
- # 获取company facts以确定可用的财年
84
  facts = self.edgar_client.get_company_facts(cik)
85
  if not facts:
86
  return []
87
 
88
- # facts中提取所有可用的财年
89
  available_years = set()
90
 
91
- # 检查US-GAAPIFRS数据源
92
  for data_source in ["us-gaap", "ifrs-full"]:
93
  if data_source in facts.get("facts", {}):
94
  source_data = facts["facts"][data_source]
95
 
96
- # 查找Revenue标签以确定可用年份
97
  revenue_tags = ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax",
98
  "Revenue", "RevenueFromContractWithCustomer"]
99
 
@@ -102,14 +102,14 @@ class FinancialAnalyzer:
102
  units = source_data[tag].get("units", {})
103
  if "USD" in units:
104
  for entry in units["USD"]:
105
- # 只考虑年度报告(10-K20-F
106
  if entry.get("form") in ["10-K", "20-F"]:
107
- # 优先使用fy字段(财政年度)
108
  fy = entry.get("fy", 0)
109
  if fy > 0:
110
  available_years.add(fy)
111
- # 如果没有fy字段,从end date提取年份作为备选
112
- # 注意:对于财年不等于日历年的公司,这可能不准确
113
  elif not fy:
114
  end_date = entry.get("end", "")
115
  if end_date and len(end_date) >= 4:
@@ -120,7 +120,7 @@ class FinancialAnalyzer:
120
  break
121
 
122
  if not available_years:
123
- # 如果无法从facts获取,回退到使用filing date
124
  filings_10k = self.edgar_client.get_company_filings(cik, ['10-K'])
125
  filings_20f = self.edgar_client.get_company_filings(cik, ['20-F'])
126
  filings = filings_10k + filings_20f
@@ -128,7 +128,7 @@ class FinancialAnalyzer:
128
  if not filings:
129
  return []
130
 
131
- # 使用filing date作为参考
132
  latest_filing_year = None
133
  for filing in filings:
134
  if 'filing_date' in filing and filing['filing_date']:
@@ -142,42 +142,42 @@ class FinancialAnalyzer:
142
  if latest_filing_year is None:
143
  return []
144
 
145
- # 生成年份列表
146
- for i in range(years * 2): # 扩大范围以捕获更多数据
147
  available_years.add(latest_filing_year - i)
148
 
149
- # 按年份降序排列
150
  sorted_years = sorted(available_years, reverse=True)
151
 
152
- # 生成期间列表(年度和季度)
153
  periods = []
154
  for year in sorted_years[:years]:
155
- # 添加年度数据
156
  periods.append(str(year))
157
- # 添加季度数据,按Q4Q3Q2Q1顺序
158
  for quarter in range(4, 0, -1):
159
  periods.append(f"{year}Q{quarter}")
160
 
161
- # 获取每个期间的财务数据
162
  for period in periods:
163
  data = self.edgar_client.get_financial_data_for_period(cik, period)
164
- # 即使没有完整数据也添加,避免N/A情况
165
- if data: # 只要period字段存在就添加
166
  financial_data.append(data)
167
 
168
  return financial_data
169
 
170
  def get_latest_financial_data(self, cik):
171
  """
172
- 获取最新财务数据
173
 
174
  Args:
175
- cik (str): 公司CIK
176
 
177
  Returns:
178
- dict: 最新财务数据
179
  """
180
- # 获取最近的财报年份(支持10-K20-F
181
  filings_10k = self.edgar_client.get_company_filings(cik, ['10-K'])
182
  filings_20f = self.edgar_client.get_company_filings(cik, ['20-F'])
183
  filings = filings_10k + filings_20f
@@ -185,7 +185,7 @@ class FinancialAnalyzer:
185
  if not filings:
186
  return {}
187
 
188
- # 获取最新的财报年份
189
  latest_filing_year = None
190
  for filing in filings:
191
  if 'filing_date' in filing and filing['filing_date']:
@@ -199,18 +199,18 @@ class FinancialAnalyzer:
199
  if latest_filing_year is None:
200
  return {}
201
 
202
- # 获取最新年份的财务数据
203
  return self.edgar_client.get_financial_data_for_period(cik, str(latest_filing_year))
204
 
205
  def format_financial_data(self, financial_data):
206
  """
207
- 格式化财务数据以便显示
208
 
209
  Args:
210
- financial_data (dict or list): 财务数据
211
 
212
  Returns:
213
- dict or list: 格式化后的财务数据
214
  """
215
  if isinstance(financial_data, list):
216
  formatted_data = []
@@ -222,24 +222,24 @@ class FinancialAnalyzer:
222
 
223
  def _format_single_financial_data(self, data):
224
  """
225
- 格式化单个财务数据条目
226
 
227
  Args:
228
- data (dict): 财务数据
229
 
230
  Returns:
231
- dict: 格式化后的财务数据
232
  """
233
  formatted = data.copy()
234
 
235
- # 确保所有关键字段都存在,即使为None
236
  key_fields = ['total_revenue', 'net_income', 'earnings_per_share', 'operating_expenses', 'operating_cash_flow', 'source_url', 'source_form']
237
  for key in key_fields:
238
  if key not in formatted:
239
  formatted[key] = None
240
 
241
- # 不再进行单位转换,保持原始数值
242
- # 格式化EPS,保留两位小数
243
  if 'earnings_per_share' in formatted and isinstance(formatted['earnings_per_share'], (int, float)):
244
  formatted['earnings_per_share'] = round(formatted['earnings_per_share'], 2)
245
 
 
8
  class FinancialAnalyzer:
9
  def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"):
10
  """
11
+ Initialize financial analyzer
12
 
13
  Args:
14
+ user_agent (str): User agent string for identifying request source
15
  """
16
  self.edgar_client = EdgarDataClient(user_agent)
17
 
18
  def search_company(self, company_input):
19
  """
20
+ Search company information (by name or CIK)
21
 
22
  Args:
23
+ company_input (str): Company name or CIK
24
 
25
  Returns:
26
+ dict: Company information
27
  """
28
+ # If input is numeric, assume it's a CIK
29
  if company_input.isdigit() and len(company_input) >= 8:
30
+ # Get company information
31
  company_info = self.edgar_client.get_company_info(company_input)
32
  if company_info:
33
  return company_info
34
  else:
35
+ return {"error": "Company not found for specified CIK"}
36
  else:
37
+ # Search company by name
38
  company = self.edgar_client.search_company_by_name(company_input)
39
  if company:
40
+ # Get detailed information
41
  company_info = self.edgar_client.get_company_info(company['cik'])
42
  if company_info:
43
  return company_info
44
  else:
45
+ # If detailed info unavailable, return basic info
46
  return {
47
  "cik": company['cik'],
48
  "name": company['name'],
49
  "tickers": [company['ticker']] if company['ticker'] else []
50
  }
51
  else:
52
+ return {"error": "No matching company found"}
53
 
54
  def get_company_filings_list(self, cik, form_types=['10-K', '10-Q']):
55
  """
56
+ Get company filings list
57
 
58
  Args:
59
+ cik (str): Company CIK
60
+ form_types (list): List of form types
61
 
62
  Returns:
63
+ list: Filings list
64
  """
65
  filings = self.edgar_client.get_company_filings(cik, form_types)
66
  return filings
67
 
68
  def extract_financial_metrics(self, cik, years=3):
69
  """
70
+ Extract financial metrics for specified number of years
71
 
72
  Args:
73
+ cik (str): Company CIK
74
+ years (int): Number of years to extract, default is 3 years
75
 
76
  Returns:
77
+ list: List of financial data
78
  """
79
+ # Get all available fiscal years directly from company facts
80
+ # This avoids mismatches between filing date and fiscal year
81
  financial_data = []
82
 
83
+ # Get company facts to determine available fiscal years
84
  facts = self.edgar_client.get_company_facts(cik)
85
  if not facts:
86
  return []
87
 
88
+ # Extract all available fiscal years from facts
89
  available_years = set()
90
 
91
+ # Check US-GAAP and IFRS data sources
92
  for data_source in ["us-gaap", "ifrs-full"]:
93
  if data_source in facts.get("facts", {}):
94
  source_data = facts["facts"][data_source]
95
 
96
+ # Look for Revenue tags to determine available years
97
  revenue_tags = ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax",
98
  "Revenue", "RevenueFromContractWithCustomer"]
99
 
 
102
  units = source_data[tag].get("units", {})
103
  if "USD" in units:
104
  for entry in units["USD"]:
105
+ # Only consider annual reports (10-K or 20-F)
106
  if entry.get("form") in ["10-K", "20-F"]:
107
+ # Prioritize using fy field (fiscal year)
108
  fy = entry.get("fy", 0)
109
  if fy > 0:
110
  available_years.add(fy)
111
+ # If no fy field, extract year from end date as fallback
112
+ # Note: This may be inaccurate for companies whose fiscal year differs from calendar year
113
  elif not fy:
114
  end_date = entry.get("end", "")
115
  if end_date and len(end_date) >= 4:
 
120
  break
121
 
122
  if not available_years:
123
+ # If unable to get from facts, fall back to using filing date
124
  filings_10k = self.edgar_client.get_company_filings(cik, ['10-K'])
125
  filings_20f = self.edgar_client.get_company_filings(cik, ['20-F'])
126
  filings = filings_10k + filings_20f
 
128
  if not filings:
129
  return []
130
 
131
+ # Use filing date as reference
132
  latest_filing_year = None
133
  for filing in filings:
134
  if 'filing_date' in filing and filing['filing_date']:
 
142
  if latest_filing_year is None:
143
  return []
144
 
145
+ # Generate year list
146
+ for i in range(years * 2): # Expand range to capture more data
147
  available_years.add(latest_filing_year - i)
148
 
149
+ # Sort years in descending order
150
  sorted_years = sorted(available_years, reverse=True)
151
 
152
+ # Generate period list (annual and quarterly)
153
  periods = []
154
  for year in sorted_years[:years]:
155
+ # Add annual data
156
  periods.append(str(year))
157
+ # Add quarterly data in order Q4, Q3, Q2, Q1
158
  for quarter in range(4, 0, -1):
159
  periods.append(f"{year}Q{quarter}")
160
 
161
+ # Get financial data for each period
162
  for period in periods:
163
  data = self.edgar_client.get_financial_data_for_period(cik, period)
164
+ # Add even if incomplete data, to avoid N/A situations
165
+ if data: # Add as long as period field exists
166
  financial_data.append(data)
167
 
168
  return financial_data
169
 
170
  def get_latest_financial_data(self, cik):
171
  """
172
+ Get latest financial data
173
 
174
  Args:
175
+ cik (str): Company CIK
176
 
177
  Returns:
178
+ dict: Latest financial data
179
  """
180
+ # Get latest filing year (supports 10-K and 20-F)
181
  filings_10k = self.edgar_client.get_company_filings(cik, ['10-K'])
182
  filings_20f = self.edgar_client.get_company_filings(cik, ['20-F'])
183
  filings = filings_10k + filings_20f
 
185
  if not filings:
186
  return {}
187
 
188
+ # Get latest filing year
189
  latest_filing_year = None
190
  for filing in filings:
191
  if 'filing_date' in filing and filing['filing_date']:
 
199
  if latest_filing_year is None:
200
  return {}
201
 
202
+ # Get financial data for latest year
203
  return self.edgar_client.get_financial_data_for_period(cik, str(latest_filing_year))
204
 
205
  def format_financial_data(self, financial_data):
206
  """
207
+ Format financial data for display
208
 
209
  Args:
210
+ financial_data (dict or list): Financial data
211
 
212
  Returns:
213
+ dict or list: Formatted financial data
214
  """
215
  if isinstance(financial_data, list):
216
  formatted_data = []
 
222
 
223
  def _format_single_financial_data(self, data):
224
  """
225
+ Format single financial data entry
226
 
227
  Args:
228
+ data (dict): Financial data
229
 
230
  Returns:
231
+ dict: Formatted financial data
232
  """
233
  formatted = data.copy()
234
 
235
+ # Ensure all key fields exist, even if None
236
  key_fields = ['total_revenue', 'net_income', 'earnings_per_share', 'operating_expenses', 'operating_cash_flow', 'source_url', 'source_form']
237
  for key in key_fields:
238
  if key not in formatted:
239
  formatted[key] = None
240
 
241
+ # No longer perform unit conversion, keep original values
242
+ # Format EPS, keep two decimal places
243
  if 'earnings_per_share' in formatted and isinstance(formatted['earnings_per_share'], (int, float)):
244
  formatted['earnings_per_share'] = round(formatted['earnings_per_share'], 2)
245