File size: 19,176 Bytes
98e3256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5415fc
98e3256
 
 
 
b5415fc
98e3256
 
b5415fc
98e3256
 
b5415fc
98e3256
 
b5415fc
98e3256
 
 
b5415fc
98e3256
 
 
 
 
 
 
 
 
 
b5415fc
98e3256
 
 
 
b5415fc
98e3256
 
b5415fc
 
98e3256
 
b5415fc
98e3256
 
b5415fc
98e3256
 
 
b5415fc
98e3256
 
b5415fc
98e3256
 
 
b5415fc
98e3256
 
 
 
 
b5415fc
98e3256
 
 
b5415fc
98e3256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5415fc
98e3256
 
 
 
b5415fc
98e3256
 
b5415fc
98e3256
 
b5415fc
98e3256
 
b5415fc
98e3256
 
 
 
 
 
b5415fc
98e3256
 
 
 
b5415fc
98e3256
 
b5415fc
 
98e3256
 
b5415fc
98e3256
 
b5415fc
98e3256
 
 
b5415fc
98e3256
 
 
 
 
b5415fc
98e3256
 
 
b5415fc
 
98e3256
 
 
 
 
 
 
 
b5415fc
98e3256
 
b5415fc
98e3256
b5415fc
98e3256
b5415fc
98e3256
 
 
b5415fc
98e3256
 
 
 
 
b5415fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98e3256
b5415fc
98e3256
b5415fc
98e3256
 
 
 
 
 
 
 
 
 
 
 
 
b5415fc
98e3256
 
 
 
b5415fc
98e3256
 
 
b5415fc
98e3256
 
b5415fc
98e3256
 
 
 
 
 
 
 
 
 
 
b5415fc
98e3256
 
b5415fc
98e3256
b5415fc
98e3256
 
 
 
 
 
c0bdef3
 
98e3256
c0bdef3
98e3256
 
 
 
 
c0bdef3
98e3256
 
c0bdef3
 
 
 
98e3256
 
 
 
 
b5415fc
98e3256
 
 
 
 
 
 
b5415fc
98e3256
 
 
 
b5415fc
98e3256
 
b5415fc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98e3256
b5415fc
 
98e3256
 
 
 
 
 
 
 
 
 
 
 
 
 
b5415fc
98e3256
 
 
 
 
b5415fc
98e3256
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
"""EDGAR API Client Module"""

import requests
try:
    from sec_edgar_api.EdgarClient import EdgarClient
except ImportError:
    EdgarClient = None
import json
import time


class EdgarDataClient:
    def __init__(self, user_agent="Juntao Peng Financial Report Metrics App (jtyxabc@gmail.com)"):
        """Initialize EDGAR client"""
        self.user_agent = user_agent
        if EdgarClient:
            self.edgar = EdgarClient(user_agent=user_agent)
        else:
            self.edgar = None
        
    def search_company_by_name(self, company_name):
        """Search company CIK by company name"""
        try:
            # Use SEC company ticker database
            url = "https://www.sec.gov/files/company_tickers.json"
            headers = {"User-Agent": self.user_agent}
            
            response = requests.get(url, headers=headers)
            response.raise_for_status()
            
            companies = response.json()
            
            # Search for matching company names
            matches = []
            exact_matches = []
            for _, company in companies.items():
                company_title = company["title"].lower()
                search_name = company_name.lower()
                
                # Exact match
                if search_name == company_title:
                    exact_matches.append({
                        "cik": str(company["cik_str"]).zfill(10),
                        "name": company["title"],
                        "ticker": company["ticker"]
                    })
                # Partial match
                elif search_name in company_title or \
                     search_name in company["ticker"].lower():
                    matches.append({
                        "cik": str(company["cik_str"]).zfill(10),
                        "name": company["title"],
                        "ticker": company["ticker"]
                    })
            
            # Return exact match first, then partial match
            if exact_matches:
                return exact_matches[0]
            elif matches:
                return matches[0]
            else:
                return None
            
        except Exception as e:
            print(f"Error searching company: {e}")
            return None
    
    def get_company_info(self, cik):
        """
        Get basic company information
        
        Args:
            cik (str): Company CIK code
            
        Returns:
            dict: Dictionary containing company information
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return None
            
        try:
            # Get company submissions
            submissions = self.edgar.get_submissions(cik=cik)
            
            return {
                "cik": cik,
                "name": submissions.get("name", ""),
                "tickers": submissions.get("tickers", []),
                "sic": submissions.get("sic", ""),
                "sic_description": submissions.get("sicDescription", "")
            }
        except Exception as e:
            print(f"Error getting company info: {e}")
            return None
    
    def get_company_filings(self, cik, form_types=None):
        """
        Get all company filing documents
        
        Args:
            cik (str): Company CIK code
            form_types (list): List of form types, e.g., ['10-K', '10-Q'], None for all types
            
        Returns:
            list: List of filing documents
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return []
            
        try:
            # Get company submissions
            submissions = self.edgar.get_submissions(cik=cik)
            
            # Extract filing information
            filings = []
            recent = submissions.get("filings", {}).get("recent", {})
            
            # Get data from each field
            form_types_list = recent.get("form", [])
            filing_dates = recent.get("filingDate", [])
            accession_numbers = recent.get("accessionNumber", [])
            primary_documents = recent.get("primaryDocument", [])
            
            # Iterate through all filings
            for i in range(len(form_types_list)):
                form_type = form_types_list[i]
                
                # Filter by form type if specified
                if form_types and form_type not in form_types:
                    continue
                
                filing_date = filing_dates[i] if i < len(filing_dates) else ""
                accession_number = accession_numbers[i] if i < len(accession_numbers) else ""
                primary_document = primary_documents[i] if i < len(primary_documents) else ""
                
                filing = {
                    "form_type": form_type,
                    "filing_date": filing_date,
                    "accession_number": accession_number,
                    "primary_document": primary_document
                }
                
                filings.append(filing)
            
            return filings
        except Exception as e:
            print(f"Error getting company filings: {e}")
            return []
    
    def get_company_facts(self, cik):
        """
        Get all company financial facts data
        
        Args:
            cik (str): Company CIK code
            
        Returns:
            dict: Company financial facts data
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return {}
            
        try:
            facts = self.edgar.get_company_facts(cik=cik)
            return facts
        except Exception as e:
            print(f"Error getting company facts: {e}")
            return {}
    
    def get_financial_data_for_period(self, cik, period):
        """
        Get financial data for a specific period (supports annual and quarterly)
        
        Args:
            cik (str): Company CIK code
            period (str): Period in format 'YYYY' or 'YYYYQX' (e.g., '2025' or '2025Q3')
            
        Returns:
            dict: Financial data dictionary
        """
        if not self.edgar:
            print("sec_edgar_api library not installed")
            return {}
            
        try:
            # Get company financial facts
            facts = self.get_company_facts(cik)
            
            if not facts:
                return {}
            
            # Extract us-gaap and ifrs-full financial data (20-F may use IFRS)
            us_gaap = facts.get("facts", {}).get("us-gaap", {})
            ifrs_full = facts.get("facts", {}).get("ifrs-full", {})
            
            # Define financial metrics and their XBRL tags
            # Include multiple possible tags to improve match rate (including US-GAAP and IFRS tags)
            financial_metrics = {
                "total_revenue": ["Revenues", "RevenueFromContractWithCustomerExcludingAssessedTax", "RevenueFromContractWithCustomerIncludingAssessedTax", "SalesRevenueNet", "RevenueFromContractWithCustomer", "Revenue"],
                "net_income": ["NetIncomeLoss", "ProfitLoss", "NetIncome", "ProfitLossAttributableToOwnersOfParent"],
                "earnings_per_share": ["EarningsPerShareBasic", "EarningsPerShare", "BasicEarningsPerShare", "BasicEarningsLossPerShare"],
                "operating_expenses": ["OperatingExpenses", "OperatingCostsAndExpenses", "OperatingExpensesExcludingDepreciationAndAmortization", "CostsAndExpenses", "GeneralAndAdministrativeExpense", "CostOfRevenue", "ResearchAndDevelopmentExpense", "SellingAndMarketingExpense"],
                "operating_cash_flow": ["NetCashProvidedByUsedInOperatingActivities", "NetCashProvidedUsedInOperatingActivities", "NetCashFlowsFromUsedInOperatingActivities", "CashFlowsFromUsedInOperatingActivities"],
            }
            
            # Store result
            result = {"period": period}
            
            # Determine target form types to search
            if 'Q' in period:
                # Quarterly data, mainly search 10-Q (20-F usually doesn't have quarterly reports)
                target_forms = ["10-Q"]
                target_forms_annual = ["10-K", "20-F"]  # for fallback
                year = int(period.split('Q')[0])
                quarter = period.split('Q')[1]
            else:
                # Annual data, search 10-K and 20-F annual forms
                target_forms = ["10-K", "20-F"]
                target_forms_annual = target_forms
                year = int(period)
                quarter = None
            
            # Get company filings to find accession number and primary document
            filings = self.get_company_filings(cik, form_types=target_forms)
            filings_map = {}  # Map: form -> {accession_number, primary_document, filing_date}
            
            # Build filing map for quick lookup
            for filing in filings:
                form_type = filing.get("form_type", "")
                filing_date = filing.get("filing_date", "")
                accession_number = filing.get("accession_number", "")
                primary_document = filing.get("primary_document", "")
                
                if filing_date and accession_number:
                    # Extract year from filing_date (format: YYYY-MM-DD)
                    file_year = int(filing_date[:4]) if len(filing_date) >= 4 else 0
                    
                    # Store filing if it matches the period year
                    if file_year == year:
                        key = f"{form_type}_{file_year}"
                        if key not in filings_map:
                            filings_map[key] = {
                                "accession_number": accession_number,
                                "primary_document": primary_document,
                                "form_type": form_type,
                                "filing_date": filing_date
                            }
            
            # Iterate through each financial metric
            for metric_key, metric_tags in financial_metrics.items():
                # Support multiple possible tags
                for metric_tag in metric_tags:
                    # Search both US-GAAP and IFRS tags
                    metric_data = None
                    data_source = None
                    
                    if metric_tag in us_gaap:
                        metric_data = us_gaap[metric_tag]
                        data_source = "us-gaap"
                    elif metric_tag in ifrs_full:
                        metric_data = ifrs_full[metric_tag]
                        data_source = "ifrs-full"
                    
                    if metric_data:
                        units = metric_data.get("units", {})
                        
                        # Find USD unit data (supports USD and USD/shares)
                        usd_data = None
                        if "USD" in units:
                            usd_data = units["USD"]
                        elif "USD/shares" in units and metric_key == "earnings_per_share":
                            # EPS uses USD/shares unit
                            usd_data = units["USD/shares"]
                        
                        if usd_data:
                            # Try exact match first, then loose match
                            matched_entry = None
                            
                            # Search for data in the specified period
                            for entry in usd_data:
                                form = entry.get("form", "")
                                fy = entry.get("fy", 0)
                                fp = entry.get("fp", "")
                                end_date = entry.get("end", "")
                                
                                if not end_date or len(end_date) < 4:
                                    continue
                                    
                                entry_year = int(end_date[:4])
                                
                                # Check if form type matches
                                if form in target_forms:
                                    if quarter:
                                        # Quarterly data match
                                        if entry_year == year and fp == f"Q{quarter}":
                                            # If already matched, compare end date, choose the latest
                                            if matched_entry:
                                                if entry.get("end", "") > matched_entry.get("end", ""):
                                                    matched_entry = entry
                                            else:
                                                matched_entry = entry
                                    else:
                                        # Annual data match - prioritize fiscal year (fy) field
                                        # Strategy 1: Exact match by fiscal year
                                        if fy == year and (fp == "FY" or fp == "" or not fp):
                                            # If already matched, compare end date, choose the latest
                                            if matched_entry:
                                                if entry.get("end", "") > matched_entry.get("end", ""):
                                                    matched_entry = entry
                                            else:
                                                matched_entry = entry
                                        # Strategy 2: Match by end date year (when fy not available or doesn't match)
                                        elif not matched_entry and entry_year == year and (fp == "FY" or fp == "" or not fp):
                                            matched_entry = entry
                                        # Strategy 3: Allow fy to differ by 1 year (fiscal year vs calendar year mismatch)
                                        elif not matched_entry and fy > 0 and abs(fy - year) <= 1 and (fp == "FY" or fp == "" or not fp):
                                            matched_entry = entry
                                        # Strategy 4: Match by frame field for 20-F
                                        elif not matched_entry and form == "20-F" and "frame" in entry:
                                            frame = entry.get("frame", "")
                                            if f"CY{year}" in frame or str(year) in end_date:
                                                matched_entry = entry
                            
                            # If quarterly data not found, try finding from annual report (fallback strategy)
                            if not matched_entry and quarter and target_forms_annual:
                                for entry in usd_data:
                                    form = entry.get("form", "")
                                    end_date = entry.get("end", "")
                                    fp = entry.get("fp", "")
                                    
                                    if form in target_forms_annual and end_date:
                                        # Check if end date is within this quarter range
                                        if str(year) in end_date and f"Q{quarter}" in fp:
                                            matched_entry = entry
                                            break
                            
                            # Apply matched data
                            if matched_entry:
                                result[metric_key] = matched_entry.get("val", 0)
                                
                                # Get form and accession info
                                form_type = matched_entry.get("form", "")
                                accn_from_facts = matched_entry.get('accn', '').replace('-', '')
                                
                                # Try to get accession_number and primary_document from filings
                                filing_key = f"{form_type}_{year}"
                                filing_info = filings_map.get(filing_key)
                                
                                if filing_info:
                                    # Use filing info from get_company_filings
                                    accession_number = filing_info["accession_number"].replace('-', '')
                                    primary_document = filing_info["primary_document"]
                                    
                                    # Generate complete source URL
                                    if primary_document:
                                        result["source_url"] = f"https://www.sec.gov/Archives/edgar/data/{cik}/{accession_number}/{primary_document}"
                                    else:
                                        result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
                                else:
                                    # Fallback to company browse page if filing not found
                                    result["source_url"] = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK={cik}&type={form_type}&dateb=&owner=exclude&count=100"
                                
                                result["source_form"] = form_type
                                result["data_source"] = data_source
                                
                                # Add detailed information
                                result[f"{metric_key}_details"] = {
                                    "tag": metric_tag,
                                    "form": matched_entry.get("form", ""),
                                    "fy": matched_entry.get("fy", 0),
                                    "fp": matched_entry.get("fp", ""),
                                    "val": matched_entry.get("val", 0),
                                    "start": matched_entry.get("start", ""),
                                    "end": matched_entry.get("end", ""),
                                    "accn": matched_entry.get("accn", ""),
                                    "filed": matched_entry.get("filed", ""),
                                    "frame": matched_entry.get("frame", ""),
                                    "data_source": data_source
                                }
                        
                        # If data is found, break out of tag loop
                        if metric_key in result:
                            break
            
            return result
        except Exception as e:
            print(f"Error getting financial data for period {period}: {e}")
            return {}