|
|
"""Helper functions for Quantitative Analysis.""" |
|
|
|
|
|
from typing import TYPE_CHECKING, Union |
|
|
|
|
|
if TYPE_CHECKING: |
|
|
from pandas import DataFrame, Series |
|
|
|
|
|
|
|
|
|
|
|
def get_fama_raw(start_date: str, end_date: str) -> "DataFrame": |
|
|
"""Get base Fama French data to calculate risk. |
|
|
|
|
|
Returns |
|
|
------- |
|
|
DataFrame |
|
|
A data with fama french model information |
|
|
""" |
|
|
|
|
|
from io import BytesIO |
|
|
from urllib.request import urlopen |
|
|
from zipfile import ZipFile |
|
|
|
|
|
from pandas import read_csv, to_datetime, to_numeric |
|
|
|
|
|
with urlopen( |
|
|
"https://mba.tuck.dartmouth.edu/pages/faculty/ken.french/ftp/F-F_Research_Data_Factors_CSV.zip" |
|
|
) as url: |
|
|
|
|
|
with ZipFile(BytesIO(url.read())) as zipfile: |
|
|
with zipfile.open("F-F_Research_Data_Factors.CSV") as zip_open: |
|
|
df = read_csv( |
|
|
zip_open, |
|
|
header=0, |
|
|
names=["Date", "MKT-RF", "SMB", "HML", "RF"], |
|
|
skiprows=3, |
|
|
) |
|
|
|
|
|
df = df[df["Date"].apply(lambda x: len(str(x).strip()) == 6)] |
|
|
df["Date"] = df["Date"].astype(str) + "01" |
|
|
df["Date"] = to_datetime(df["Date"], format="%Y%m%d") |
|
|
df["MKT-RF"] = to_numeric(df["MKT-RF"], downcast="float") |
|
|
df["SMB"] = to_numeric(df["SMB"], downcast="float") |
|
|
df["HML"] = to_numeric(df["HML"], downcast="float") |
|
|
df["RF"] = to_numeric(df["RF"], downcast="float") |
|
|
df["MKT-RF"] = df["MKT-RF"] / 100 |
|
|
df["SMB"] = df["SMB"] / 100 |
|
|
df["HML"] = df["HML"] / 100 |
|
|
df["RF"] = df["RF"] / 100 |
|
|
df = df.set_index("Date") |
|
|
|
|
|
dt_start_date = to_datetime(start_date, format="%Y-%m-%d") |
|
|
if dt_start_date > df.index.max(): |
|
|
raise ValueError( |
|
|
f"Start date '{dt_start_date}' is after the last date available for Fama-French '{df.index[-1]}'" |
|
|
) |
|
|
|
|
|
df = df.loc[start_date:end_date] |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
def validate_window(input_data: Union["Series", "DataFrame"], window: int) -> None: |
|
|
"""Validate the window input. |
|
|
|
|
|
Parameters |
|
|
---------- |
|
|
input_data : Union[Series, DataFrame] |
|
|
The input data to be validated. |
|
|
window : int |
|
|
The window to be validated. |
|
|
|
|
|
Raises |
|
|
------ |
|
|
ValueError |
|
|
If the window is greater than the input data length. |
|
|
""" |
|
|
if window > len(input_data): |
|
|
raise ValueError( |
|
|
f"Window '{window}' is greater than the input data length '{len(input_data)}'" |
|
|
) |
|
|
|