CatPtain's picture
Upload 131 files
1b6b94f verified
"""SEC Frames Utilities."""
# pylint: disable=line-too-long
import asyncio
from datetime import datetime
from typing import Dict, List, Optional, Union
from warnings import warn
from aiohttp_client_cache import SQLiteBackend
from aiohttp_client_cache.session import CachedSession
from openbb_core.app.model.abstract.error import OpenBBError
from openbb_core.app.utils import get_user_cache_directory
from openbb_core.provider.utils.errors import EmptyDataError
from openbb_core.provider.utils.helpers import amake_request
from openbb_sec.utils.definitions import (
FISCAL_PERIODS,
FISCAL_PERIODS_DICT,
HEADERS,
SHARES_FACTS,
TAXONOMIES,
USD_PER_SHARE_FACTS,
)
from openbb_sec.utils.helpers import get_all_companies, symbol_map
from pandas import DataFrame
async def fetch_data(url, use_cache, persist) -> Union[Dict, List[Dict]]:
"""Fetch the data from the constructed URL."""
response: Union[Dict, List[Dict]] = {}
if use_cache is True:
cache_dir = f"{get_user_cache_directory()}/http/sec_frames"
async with CachedSession(
cache=(
SQLiteBackend(cache_dir, expire_after=3600 * 24)
if persist is False
else SQLiteBackend(cache_dir)
)
) as session:
try:
response = await amake_request(url, headers=HEADERS, session=session) # type: ignore
finally:
await session.close()
else:
response = await amake_request(url, headers=HEADERS) # type: ignore
return response
async def get_frame( # pylint: disable =too-many-arguments,too-many-locals, too-many-statements
fact: str = "Revenues",
year: Optional[int] = None,
fiscal_period: Optional[FISCAL_PERIODS] = None,
taxonomy: Optional[TAXONOMIES] = "us-gaap",
units: Optional[str] = "USD",
instantaneous: bool = False,
use_cache: bool = True,
) -> Dict:
"""Get a frame of data for a given fact.
Source: https://www.sec.gov/edgar/sec-api-documentation
The xbrl/frames API aggregates one fact for each reporting entity
that is last filed that most closely fits the calendrical period requested.
This API supports for annual, quarterly and instantaneous data:
https://data.sec.gov/api/xbrl/frames/us-gaap/AccountsPayableCurrent/USD/CY2019Q1I.json
Where the units of measure specified in the XBRL contains a numerator and a denominator,
these are separated by “-per-” such as “USD-per-shares”. Note that the default unit in XBRL is “pure”.
The period format is CY#### for annual data (duration 365 days +/- 30 days),
CY####Q# for quarterly data (duration 91 days +/- 30 days).
Because company financial calendars can start and end on any month or day and even change in length from quarter to
quarter according to the day of the week, the frame data is assembled by the dates that best align with a calendar
quarter or year. Data users should be mindful different reporting start and end dates for facts contained in a frame.
Parameters
----------
fact : str
The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
Defaults to "Revenues".
AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
In previous years, they may have reported as "Revenues".
year : int, optional
The year to retrieve the data for. If not provided, the current year is used.
fiscal_period: Literal["fy", "q1", "q2", "q3", "q4"], optional
The fiscal period to retrieve the data for. If not provided, the most recent quarter is used.
taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
The taxonomy to use. Defaults to "us-gaap".
units : str, optional
The units to use. Defaults to "USD". This should be a valid unit from the SEC taxonomy, see the notes above.
The most common units are "USD", "shares", and "USD-per-shares". EPS and outstanding shares facts will
automatically set.
instantaneous: bool
Whether to retrieve instantaneous data. See the notes above for more information. Defaults to False.
Some facts are only available as instantaneous data.
The function will automatically attempt to retrieve the data if the initial fiscal quarter request fails.
use_cache: bool
Whether to use cache for the request. Defaults to True.
Returns
-------
Dict:
Nested dictionary with keys, "metadata" and "data".
The "metadata" key contains information about the frame.
"""
current_date = datetime.now().date()
quarter = FISCAL_PERIODS_DICT.get(fiscal_period) if fiscal_period else None
if year is None and quarter is None:
quarter = (current_date.month - 1) // 3
year = current_date.year
if year is None:
year = current_date.year
persist = current_date.year == year
if fact in SHARES_FACTS:
units = "shares"
if fact in USD_PER_SHARE_FACTS:
units = "USD-per-shares"
url = f"https://data.sec.gov/api/xbrl/frames/{taxonomy}/{fact}/{units}/CY{year}"
if quarter:
url = url + f"Q{quarter}"
if instantaneous:
url = url + "I"
url = url + ".json"
response: Union[Dict, List[Dict]] = {}
try:
response = await fetch_data(url, use_cache, persist)
except Exception as e: # pylint: disable=W0718
message = (
"No frame was found with the combination of parameters supplied."
+ " Try adjusting the period."
+ " Not all GAAP measures have frames available."
)
if url.endswith("I.json"):
warn("No instantaneous frame was found, trying calendar period data.")
url = url.replace("I.json", ".json")
try:
response = await fetch_data(url, use_cache, persist)
except Exception:
raise OpenBBError(message) from e
elif "Q" in url and not url.endswith("I.json"):
warn(
"No frame was found for the requested quarter, trying instantaneous data."
)
url = url.replace(".json", "I.json")
try:
response = await fetch_data(url, use_cache, persist)
except Exception:
raise OpenBBError(message) from e
else:
raise OpenBBError(message) from e
data = sorted(response.get("data", {}), key=lambda x: x["val"], reverse=True) # type: ignore
metadata = {
"frame": response.get("ccp", ""), # type: ignore
"tag": response.get("tag", ""), # type: ignore
"label": response.get("label", ""), # type: ignore
"description": response.get("description", ""), # type: ignore
"taxonomy": response.get("taxonomy", ""), # type: ignore
"unit": response.get("uom", ""), # type: ignore
"count": response.get("pts", ""), # type: ignore
}
df = DataFrame(data)
companies = await get_all_companies(use_cache=use_cache)
cik_to_symbol = companies.set_index("cik")["symbol"].to_dict()
df["symbol"] = df["cik"].astype(str).map(cik_to_symbol)
df["unit"] = metadata.get("unit")
df["fact"] = metadata.get("label")
df["frame"] = metadata.get("frame")
df = df.fillna("N/A").replace("N/A", None)
results = {"metadata": metadata, "data": df.to_dict("records")}
return results
async def get_concept(
symbol: str,
fact: str = "Revenues",
year: Optional[int] = None,
taxonomy: Optional[TAXONOMIES] = "us-gaap",
use_cache: bool = True,
) -> Dict:
"""Return all the XBRL disclosures from a single company (CIK) Concept (a taxonomy and tag) into a single JSON file.
Each entry contains a separate array of facts for each units of measure that the company has chosen to disclose
(e.g. net profits reported in U.S. dollars and in Canadian dollars).
Parameters
----------
symbol: str
The ticker symbol to look up.
fact : str
The fact to retrieve. This should be a valid fact from the SEC taxonomy, in UpperCamelCase.
Defaults to "Revenues".
AAPL, MSFT, GOOG, BRK-A all report revenue as, "RevenueFromContractWithCustomerExcludingAssessedTax".
In previous years, they may have reported as "Revenues".
year : int, optional
The year to retrieve the data for. If not provided, all reported values will be returned.
taxonomy : Literal["us-gaap", "dei", "ifrs-full", "srt"], optional
The taxonomy to use. Defaults to "us-gaap".
use_cache: bool
Whether to use cache for the request. Defaults to True.
Returns
-------
Dict:
Nested dictionary with keys, "metadata" and "data".
The "metadata" key contains information about the company concept.
"""
symbols = symbol.split(",")
results: List[Dict] = []
messages: List = []
metadata: Dict = {}
async def get_one(ticker):
"""Get data for one symbol."""
ticker = ticker.upper()
message = f"Symbol Error: No data was found for, {ticker} and {fact}"
cik = await symbol_map(ticker)
if cik == "":
message = f"Symbol Error: No CIK was found for, {ticker}"
warn(message)
messages.append(message)
else:
url = f"https://data.sec.gov/api/xbrl/companyconcept/CIK{cik}/{taxonomy}/{fact}.json"
response: Union[Dict, List[Dict]] = {}
try:
response = await fetch_data(url, use_cache, False)
except Exception as _: # pylint: disable=W0718
warn(message)
messages.append(message)
if response:
units = response.get("units", {}) # type: ignore
metadata[ticker] = {
"cik": response.get("cik", ""), # type: ignore
"taxonomy": response.get("taxonomy", ""), # type: ignore
"tag": response.get("tag", ""), # type: ignore
"label": response.get("label", ""), # type: ignore
"description": response.get("description", ""), # type: ignore
"name": response.get("entityName", ""), # type: ignore
"units": (
list(units) if units and len(units) > 1 else list(units)[0]
),
}
for k, v in units.items():
unit = k
values = v
for item in values:
item["unit"] = unit
item["symbol"] = ticker
item["cik"] = metadata[ticker]["cik"]
item["name"] = metadata[ticker]["name"]
item["fact"] = metadata[ticker]["label"]
results.extend(values)
await asyncio.gather(*[get_one(ticker) for ticker in symbols])
if not results:
raise EmptyDataError(f"{messages}")
if year is not None:
filtered_results = [d for d in results if str(year) == str(d.get("fy"))]
if len(filtered_results) > 0:
results = filtered_results
if len(filtered_results) == 0:
warn(
f"No results were found for {fact} in the year, {year}."
" Returning all entries instead. Concept and fact names may differ by company and year."
)
return {
"metadata": metadata,
"data": sorted(results, key=lambda x: (x["filed"], x["end"]), reverse=True),
}