File size: 11,699 Bytes
1b6b94f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
"""SEC Frames Utilities."""

# pylint: disable=line-too-long

import asyncio
from datetime import datetime
from typing import Dict, List, Optional, Union
from warnings import warn

from aiohttp_client_cache import SQLiteBackend
from aiohttp_client_cache.session import CachedSession
from openbb_core.app.model.abstract.error import OpenBBError
from openbb_core.app.utils import get_user_cache_directory
from openbb_core.provider.utils.errors import EmptyDataError
from openbb_core.provider.utils.helpers import amake_request
from openbb_sec.utils.definitions import (
    FISCAL_PERIODS,
    FISCAL_PERIODS_DICT,
    HEADERS,
    SHARES_FACTS,
    TAXONOMIES,
    USD_PER_SHARE_FACTS,
)
from openbb_sec.utils.helpers import get_all_companies, symbol_map
from pandas import DataFrame


async def fetch_data(url, use_cache, persist) -> Union[Dict, List[Dict]]:
    """Fetch the data from the constructed URL."""
    response: Union[Dict, List[Dict]] = {}
    if use_cache is True:
        cache_dir = f"{get_user_cache_directory()}/http/sec_frames"
        async with CachedSession(
            cache=(
                SQLiteBackend(cache_dir, expire_after=3600 * 24)
                if persist is False
                else SQLiteBackend(cache_dir)
            )
        ) as session:
            try:
                response = await amake_request(url, headers=HEADERS, session=session)  # type: ignore
            finally:
                await session.close()
    else:
        response = await amake_request(url, headers=HEADERS)  # type: ignore
    return response


async def get_frame(  # pylint: disable =too-many-arguments,too-many-locals, too-many-statements
    fact: str = "Revenues",
    year: Optional[int] = None,
    fiscal_period: Optional[FISCAL_PERIODS] = None,
    taxonomy: Optional[TAXONOMIES] = "us-gaap",
    units: Optional[str] = "USD",
    instantaneous: bool = False,
    use_cache: bool = True,
) -> Dict:
    """Get a frame of data for a given fact.

    Source: https://www.sec.gov/edgar/sec-api-documentation

    The xbrl/frames API aggregates one fact for each reporting entity
    that is last filed that most closely fits the calendrical period requested.

    This API supports for annual, quarterly and instantaneous data:

    https://data.sec.gov/api/xbrl/frames/us-gaap/AccountsPayableCurrent/USD/CY2019Q1I.json

    Where the units of measure specified in the XBRL contains a numerator and a denominator,
    these are separated by “-per-” such as “USD-per-shares”. Note that the default unit in XBRL is “pure”.

    The period format is CY#### for annual data (duration 365 days +/- 30 days),
    CY####Q# for quarterly data (duration 91 days +/- 30 days).

    Because company financial calendars can start and end on any month or day and even change in length from quarter to
    quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
    quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.

    Parameters
    ----------
    fact : str
        The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
        Defaults to "Revenues".
        AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
        In previous years, they may have reported as "Revenues".
    year : int, optional
        The year to retrieve the data for. If not provided, the current year is used.
    fiscal_period: Literal["fy", "q1", "q2", "q3", "q4"], optional
        The fiscal period to retrieve the data for. If not provided, the most recent quarter is used.
    taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
        The taxonomy to use. Defaults to "us-gaap".
    units : str, optional
        The units to use. Defaults to "USD". This should be a valid unit from the SEC taxonomy, see the notes above.
        The most common units are "USD", "shares", and "USD-per-shares". EPS and outstanding shares facts will
        automatically set.
    instantaneous: bool
        Whether to retrieve instantaneous data. See the notes above for more information. Defaults to False.
        Some facts are only available as instantaneous data.
        The function will automatically attempt to retrieve the data if the initial fiscal quarter request fails.
    use_cache: bool
        Whether to use cache for the request. Defaults to True.

    Returns
    -------
    Dict:
        Nested dictionary with keys, "metadata" and "data".
        The "metadata" key contains information about the frame.
    """
    current_date = datetime.now().date()
    quarter = FISCAL_PERIODS_DICT.get(fiscal_period) if fiscal_period else None
    if year is None and quarter is None:
        quarter = (current_date.month - 1) // 3
        year = current_date.year

    if year is None:
        year = current_date.year

    persist = current_date.year == year

    if fact in SHARES_FACTS:
        units = "shares"

    if fact in USD_PER_SHARE_FACTS:
        units = "USD-per-shares"

    url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{fact}/{units}/CY{year}"

    if quarter:
        url = url + f"Q{quarter}"

    if instantaneous:
        url = url + "I"

    url = url + ".json"
    response: Union[Dict, List[Dict]] = {}
    try:
        response = await fetch_data(url, use_cache, persist)
    except Exception as e:  # pylint: disable=W0718
        message = (
            "No frame was found with the combination of parameters supplied."
            + " Try adjusting the period."
            + " Not all GAAP measures have frames available."
        )
        if url.endswith("I.json"):
            warn("No instantaneous frame was found, trying calendar period data.")
            url = url.replace("I.json", ".json")
            try:
                response = await fetch_data(url, use_cache, persist)
            except Exception:
                raise OpenBBError(message) from e
        elif "Q" in url and not url.endswith("I.json"):
            warn(
                "No frame was found for the requested quarter, trying instantaneous data."
            )
            url = url.replace(".json", "I.json")
            try:
                response = await fetch_data(url, use_cache, persist)
            except Exception:
                raise OpenBBError(message) from e
        else:
            raise OpenBBError(message) from e

    data = sorted(response.get("data", {}), key=lambda x: x["val"], reverse=True)  # type: ignore
    metadata = {
        "frame": response.get("ccp", ""),  # type: ignore
        "tag": response.get("tag", ""),  # type: ignore
        "label": response.get("label", ""),  # type: ignore
        "description": response.get("description", ""),  # type: ignore
        "taxonomy": response.get("taxonomy", ""),  # type: ignore
        "unit": response.get("uom", ""),  # type: ignore
        "count": response.get("pts", ""),  # type: ignore
    }
    df = DataFrame(data)
    companies = await get_all_companies(use_cache=use_cache)
    cik_to_symbol = companies.set_index("cik")["symbol"].to_dict()
    df["symbol"] = df["cik"].astype(str).map(cik_to_symbol)
    df["unit"] = metadata.get("unit")
    df["fact"] = metadata.get("label")
    df["frame"] = metadata.get("frame")
    df = df.fillna("N/A").replace("N/A", None)
    results = {"metadata": metadata, "data": df.to_dict("records")}

    return results


async def get_concept(
    symbol: str,
    fact: str = "Revenues",
    year: Optional[int] = None,
    taxonomy: Optional[TAXONOMIES] = "us-gaap",
    use_cache: bool = True,
) -> Dict:
    """Return all the XBRL disclosures from a single company (CIK) Concept (a taxonomy and tag) into a single JSON file.

    Each entry contains a separate array of facts for each units of measure that the company has chosen to disclose
    (e.g. net profits reported in U.S. dollars and in Canadian dollars).

    Parameters
    ----------
    symbol: str
        The ticker symbol to look up.
    fact : str
        The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
        Defaults to "Revenues".
        AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
        In previous years, they may have reported as "Revenues".
    year : int, optional
        The year to retrieve the data for. If not provided, all reported values will be returned.
    taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
        The taxonomy to use. Defaults to "us-gaap".
    use_cache: bool
        Whether to use cache for the request. Defaults to True.

    Returns
    -------
    Dict:
        Nested dictionary with keys, "metadata" and "data".
        The "metadata" key contains information about the company concept.
    """
    symbols = symbol.split(",")
    results: List[Dict] = []
    messages: List = []
    metadata: Dict = {}

    async def get_one(ticker):
        """Get data for one symbol."""
        ticker = ticker.upper()
        message = f"Symbol Error: No data was found for, {ticker} and {fact}"
        cik = await symbol_map(ticker)
        if cik == "":
            message = f"Symbol Error: No CIK was found for, {ticker}"
            warn(message)
            messages.append(message)
        else:
            url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/{taxonomy}/{fact}.json"
            response: Union[Dict, List[Dict]] = {}
            try:
                response = await fetch_data(url, use_cache, False)
            except Exception as _:  # pylint: disable=W0718
                warn(message)
                messages.append(message)
            if response:
                units = response.get("units", {})  # type: ignore
                metadata[ticker] = {
                    "cik": response.get("cik", ""),  # type: ignore
                    "taxonomy": response.get("taxonomy", ""),  # type: ignore
                    "tag": response.get("tag", ""),  # type: ignore
                    "label": response.get("label", ""),  # type: ignore
                    "description": response.get("description", ""),  # type: ignore
                    "name": response.get("entityName", ""),  # type: ignore
                    "units": (
                        list(units) if units and len(units) > 1 else list(units)[0]
                    ),
                }
                for k, v in units.items():
                    unit = k
                    values = v
                    for item in values:
                        item["unit"] = unit
                        item["symbol"] = ticker
                        item["cik"] = metadata[ticker]["cik"]
                        item["name"] = metadata[ticker]["name"]
                        item["fact"] = metadata[ticker]["label"]
                    results.extend(values)

    await asyncio.gather(*[get_one(ticker) for ticker in symbols])

    if not results:
        raise EmptyDataError(f"{messages}")

    if year is not None:
        filtered_results = [d for d in results if str(year) == str(d.get("fy"))]
        if len(filtered_results) > 0:
            results = filtered_results
        if len(filtered_results) == 0:
            warn(
                f"No results were found for {fact} in the year, {year}."
                " Returning all entries instead. Concept and fact names may differ by company and year."
            )

    return {
        "metadata": metadata,
        "data": sorted(results, key=lambda x: (x["filed"], x["end"]), reverse=True),
    }