Spaces:
Runtime error
Runtime error
File size: 19,176 Bytes
98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 c0bdef3 98e3256 c0bdef3 98e3256 c0bdef3 98e3256 c0bdef3 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 b5415fc 98e3256 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 |
"""EDGAR API Client Module"""
import requests
try:
from sec_edgar_api.EdgarClient import EdgarClient
except ImportError:
EdgarClient = None
import json
import time
class EdgarDataClient:
def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"):
"""Initialize EDGAR client"""
self.user_agent = user_agent
if EdgarClient:
self.edgar = EdgarClient(user_agent=user_agent)
else:
self.edgar = None
def search_company_by_name(self, company_name):
"""Search company CIK by company name"""
try:
# Use SEC company ticker database
url = "https://www.sec.gov/files/company_tickers.json"
headers = {"User-Agent": self.user_agent}
response = requests.get(url, headers=headers)
response.raise_for_status()
companies = response.json()
# Search for matching company names
matches = []
exact_matches = []
for _, company in companies.items():
company_title = company["title"].lower()
search_name = company_name.lower()
# Exact match
if search_name == company_title:
exact_matches.append({
"cik": str(company["cik_str"]).zfill(10),
"name": company["title"],
"ticker": company["ticker"]
})
# Partial match
elif search_name in company_title or \
search_name in company["ticker"].lower():
matches.append({
"cik": str(company["cik_str"]).zfill(10),
"name": company["title"],
"ticker": company["ticker"]
})
# Return exact match first, then partial match
if exact_matches:
return exact_matches[0]
elif matches:
return matches[0]
else:
return None
except Exception as e:
print(f"Error searching company: {e}")
return None
def get_company_info(self, cik):
"""
Get basic company information
Args:
cik (str): Company CIK code
Returns:
dict: Dictionary containing company information
"""
if not self.edgar:
print("sec_edgar_api library not installed")
return None
try:
# Get company submissions
submissions = self.edgar.get_submissions(cik=cik)
return {
"cik": cik,
"name": submissions.get("name", ""),
"tickers": submissions.get("tickers", []),
"sic": submissions.get("sic", ""),
"sic_description": submissions.get("sicDescription", "")
}
except Exception as e:
print(f"Error getting company info: {e}")
return None
def get_company_filings(self, cik, form_types=None):
"""
Get all company filing documents
Args:
cik (str): Company CIK code
form_types (list): List of form types, e.g., ['10-K', '10-Q'], None for all types
Returns:
list: List of filing documents
"""
if not self.edgar:
print("sec_edgar_api library not installed")
return []
try:
# Get company submissions
submissions = self.edgar.get_submissions(cik=cik)
# Extract filing information
filings = []
recent = submissions.get("filings", {}).get("recent", {})
# Get data from each field
form_types_list = recent.get("form", [])
filing_dates = recent.get("filingDate", [])
accession_numbers = recent.get("accessionNumber", [])
primary_documents = recent.get("primaryDocument", [])
# Iterate through all filings
for i in range(len(form_types_list)):
form_type = form_types_list[i]
# Filter by form type if specified
if form_types and form_type not in form_types:
continue
filing_date = filing_dates[i] if i < len(filing_dates) else ""
accession_number = accession_numbers[i] if i < len(accession_numbers) else ""
primary_document = primary_documents[i] if i < len(primary_documents) else ""
filing = {
"form_type": form_type,
"filing_date": filing_date,
"accession_number": accession_number,
"primary_document": primary_document
}
filings.append(filing)
return filings
except Exception as e:
print(f"Error getting company filings: {e}")
return []
def get_company_facts(self, cik):
"""
Get all company financial facts data
Args:
cik (str): Company CIK code
Returns:
dict: Company financial facts data
"""
if not self.edgar:
print("sec_edgar_api library not installed")
return {}
try:
facts = self.edgar.get_company_facts(cik=cik)
return facts
except Exception as e:
print(f"Error getting company facts: {e}")
return {}
def get_financial_data_for_period(self, cik, period):
"""
Get financial data for a specific period (supports annual and quarterly)
Args:
cik (str): Company CIK code
period (str): Period in format 'YYYY' or 'YYYYQX' (e.g., '2025' or '2025Q3')
Returns:
dict: Financial data dictionary
"""
if not self.edgar:
print("sec_edgar_api library not installed")
return {}
try:
# Get company financial facts
facts = self.get_company_facts(cik)
if not facts:
return {}
# Extract us-gaap and ifrs-full financial data (20-F may use IFRS)
us_gaap = facts.get("facts", {}).get("us-gaap", {})
ifrs_full = facts.get("facts", {}).get("ifrs-full", {})
# Define financial metrics and their XBRL tags
# Include multiple possible tags to improve match rate (including US-GAAP and IFRS tags)
financial_metrics = {
"total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"],
"net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"],
"earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"],
"operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"],
"operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"],
}
# Store result
result = {"period": period}
# Determine target form types to search
if 'Q' in period:
# Quarterly data, mainly search 10-Q (20-F usually doesn't have quarterly reports)
target_forms = ["10-Q"]
target_forms_annual = ["10-K", "20-F"] # for fallback
year = int(period.split('Q')[0])
quarter = period.split('Q')[1]
else:
# Annual data, search 10-K and 20-F annual forms
target_forms = ["10-K", "20-F"]
target_forms_annual = target_forms
year = int(period)
quarter = None
# Get company filings to find accession number and primary document
filings = self.get_company_filings(cik, form_types=target_forms)
filings_map = {} # Map: form -> {accession_number, primary_document, filing_date}
# Build filing map for quick lookup
for filing in filings:
form_type = filing.get("form_type", "")
filing_date = filing.get("filing_date", "")
accession_number = filing.get("accession_number", "")
primary_document = filing.get("primary_document", "")
if filing_date and accession_number:
# Extract year from filing_date (format: YYYY-MM-DD)
file_year = int(filing_date[:4]) if len(filing_date) >= 4 else 0
# Store filing if it matches the period year
if file_year == year:
key = f"{form_type}_{file_year}"
if key not in filings_map:
filings_map[key] = {
"accession_number": accession_number,
"primary_document": primary_document,
"form_type": form_type,
"filing_date": filing_date
}
# Iterate through each financial metric
for metric_key, metric_tags in financial_metrics.items():
# Support multiple possible tags
for metric_tag in metric_tags:
# Search both US-GAAP and IFRS tags
metric_data = None
data_source = None
if metric_tag in us_gaap:
metric_data = us_gaap[metric_tag]
data_source = "us-gaap"
elif metric_tag in ifrs_full:
metric_data = ifrs_full[metric_tag]
data_source = "ifrs-full"
if metric_data:
units = metric_data.get("units", {})
# Find USD unit data (supports USD and USD/shares)
usd_data = None
if "USD" in units:
usd_data = units["USD"]
elif "USD/shares" in units and metric_key == "earnings_per_share":
# EPS uses USD/shares unit
usd_data = units["USD/shares"]
if usd_data:
# Try exact match first, then loose match
matched_entry = None
# Search for data in the specified period
for entry in usd_data:
form = entry.get("form", "")
fy = entry.get("fy", 0)
fp = entry.get("fp", "")
end_date = entry.get("end", "")
if not end_date or len(end_date) < 4:
continue
entry_year = int(end_date[:4])
# Check if form type matches
if form in target_forms:
if quarter:
# Quarterly data match
if entry_year == year and fp == f"Q{quarter}":
# If already matched, compare end date, choose the latest
if matched_entry:
if entry.get("end", "") > matched_entry.get("end", ""):
matched_entry = entry
else:
matched_entry = entry
else:
# Annual data match - prioritize fiscal year (fy) field
# Strategy 1: Exact match by fiscal year
if fy == year and (fp == "FY" or fp == "" or not fp):
# If already matched, compare end date, choose the latest
if matched_entry:
if entry.get("end", "") > matched_entry.get("end", ""):
matched_entry = entry
else:
matched_entry = entry
# Strategy 2: Match by end date year (when fy not available or doesn't match)
elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp):
matched_entry = entry
# Strategy 3: Allow fy to differ by 1 year (fiscal year vs calendar year mismatch)
elif not matched_entry and fy > 0 and abs(fy - year) <= 1 and (fp == "FY" or fp == "" or not fp):
matched_entry = entry
# Strategy 4: Match by frame field for 20-F
elif not matched_entry and form == "20-F" and "frame" in entry:
frame = entry.get("frame", "")
if f"CY{year}" in frame or str(year) in end_date:
matched_entry = entry
# If quarterly data not found, try finding from annual report (fallback strategy)
if not matched_entry and quarter and target_forms_annual:
for entry in usd_data:
form = entry.get("form", "")
end_date = entry.get("end", "")
fp = entry.get("fp", "")
if form in target_forms_annual and end_date:
# Check if end date is within this quarter range
if str(year) in end_date and f"Q{quarter}" in fp:
matched_entry = entry
break
# Apply matched data
if matched_entry:
result[metric_key] = matched_entry.get("val", 0)
# Get form and accession info
form_type = matched_entry.get("form", "")
accn_from_facts = matched_entry.get('accn', '').replace('-', '')
# Try to get accession_number and primary_document from filings
filing_key = f"{form_type}_{year}"
filing_info = filings_map.get(filing_key)
if filing_info:
# Use filing info from get_company_filings
accession_number = filing_info["accession_number"].replace('-', '')
primary_document = filing_info["primary_document"]
# Generate complete source URL
if primary_document:
result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}"
else:
result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
else:
# Fallback to company browse page if filing not found
result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
result["source_form"] = form_type
result["data_source"] = data_source
# Add detailed information
result[f"{metric_key}_details"] = {
"tag": metric_tag,
"form": matched_entry.get("form", ""),
"fy": matched_entry.get("fy", 0),
"fp": matched_entry.get("fp", ""),
"val": matched_entry.get("val", 0),
"start": matched_entry.get("start", ""),
"end": matched_entry.get("end", ""),
"accn": matched_entry.get("accn", ""),
"filed": matched_entry.get("filed", ""),
"frame": matched_entry.get("frame", ""),
"data_source": data_source
}
# If data is found, break out of tag loop
if metric_key in result:
break
return result
except Exception as e:
print(f"Error getting financial data for period {period}: {e}")
return {}
|