Spaces:
Runtime error
Runtime error
cyberosa
commited on
Commit
Β·
8834fdb
1
Parent(s):
f26bf5c
updated live data including Friday
Browse files- data/closed_markets_div.parquet +2 -2
- data/daily_info.parquet +2 -2
- data/unknown_daily_traders.parquet +2 -2
- data/weekly_mech_calls.parquet +2 -2
- scripts/wow_retentions.py +74 -77
data/closed_markets_div.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d715e676a0779f18b69e9f549175793e6581cb9e87a456f3e8b0bc7db26190d6
|
| 3 |
+
size 48884
|
data/daily_info.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e0bf740abcffe1facea0fed126c61a19406ef32474c8dd63d85ea9448f96f701
|
| 3 |
+
size 423117
|
data/unknown_daily_traders.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1db631b6cc5b6ff1aadd6ce3285dc032fe79c83cd14bb2c1cb1fa7b7917e61b0
|
| 3 |
+
size 25139
|
data/weekly_mech_calls.parquet
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6e321e63d58f312fe2769880d9ec5ec9fba24229e427a514a3a9567936edbab5
|
| 3 |
+
size 50009
|
scripts/wow_retentions.py
CHANGED
|
@@ -4,83 +4,64 @@ from utils import DATA_DIR
|
|
| 4 |
|
| 5 |
|
| 6 |
# Basic Week over Week Retention
|
| 7 |
-
def
|
| 8 |
-
|
| 9 |
-
) -> pd.DataFrame:
|
| 10 |
-
"""Function to compute the wow retention at the week level"""
|
| 11 |
-
if trader_filter == "Olas":
|
| 12 |
-
df = traders_df.loc[traders_df["staking"] != "non_Olas"]
|
| 13 |
-
elif trader_filter == "non_Olas":
|
| 14 |
-
df = traders_df.loc[traders_df["staking"] == "non_Olas"]
|
| 15 |
-
else:
|
| 16 |
-
# unknown traders
|
| 17 |
-
print("Not implemented yet")
|
| 18 |
-
# Get unique traders per week
|
| 19 |
weekly_traders = (
|
| 20 |
-
df.groupby("month_year_week")["trader_address"]
|
|
|
|
|
|
|
| 21 |
)
|
|
|
|
| 22 |
|
| 23 |
# Calculate retention
|
| 24 |
retention = []
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
|
| 49 |
return pd.DataFrame(retention)
|
| 50 |
|
| 51 |
|
| 52 |
-
# N-Week Rolling Retention
|
| 53 |
-
def calculate_nweek_retention(df: pd.DataFrame, n_weeks=4):
|
| 54 |
-
# Get first and last trade for each trader
|
| 55 |
-
trader_activity = (
|
| 56 |
-
df.groupby("trader_address")
|
| 57 |
-
.agg({"creation_timestamp": ["min", "max"]})
|
| 58 |
-
.reset_index()
|
| 59 |
-
)
|
| 60 |
-
|
| 61 |
-
trader_activity.columns = ["trader_address", "first_trade", "last_trade"]
|
| 62 |
-
trader_activity["weeks_active"] = (
|
| 63 |
-
pd.to_datetime(trader_activity["last_trade"])
|
| 64 |
-
- pd.to_datetime(trader_activity["first_trade"])
|
| 65 |
-
).dt.days / 7
|
| 66 |
-
|
| 67 |
-
return {
|
| 68 |
-
"total_traders": len(trader_activity),
|
| 69 |
-
f"{n_weeks}_week_retained": len(
|
| 70 |
-
trader_activity[trader_activity["weeks_active"] >= n_weeks]
|
| 71 |
-
),
|
| 72 |
-
"retention_rate": (
|
| 73 |
-
len(trader_activity[trader_activity["weeks_active"] >= n_weeks])
|
| 74 |
-
/ len(trader_activity)
|
| 75 |
-
)
|
| 76 |
-
* 100,
|
| 77 |
-
}
|
| 78 |
-
|
| 79 |
-
|
| 80 |
# Cohort Retention
|
| 81 |
-
def calculate_cohort_retention(df, max_weeks=12):
|
| 82 |
# Get first week for each trader
|
| 83 |
-
# TODO check if first will retrieve the first week of the data or not
|
| 84 |
first_trades = (
|
| 85 |
df.groupby("trader_address")
|
| 86 |
.agg({"creation_timestamp": "min", "month_year_week": "first"})
|
|
@@ -119,6 +100,10 @@ def calculate_cohort_retention(df, max_weeks=12):
|
|
| 119 |
# Convert to percentages
|
| 120 |
retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
|
| 121 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
# Limit to max_weeks if specified
|
| 123 |
if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
|
| 124 |
retention_matrix = retention_matrix.iloc[:, :max_weeks]
|
|
@@ -126,16 +111,28 @@ def calculate_cohort_retention(df, max_weeks=12):
|
|
| 126 |
return retention_matrix.round(2)
|
| 127 |
|
| 128 |
|
| 129 |
-
|
| 130 |
-
# read
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 137 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Usage example:
|
| 139 |
-
wow_retention =
|
| 140 |
-
|
| 141 |
-
cohort_retention = calculate_cohort_retention(trades_df)
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
# Basic Week over Week Retention
|
| 7 |
+
def calculate_wow_retention_by_type(df: pd.DataFrame) -> pd.DataFrame:
|
| 8 |
+
# Get unique traders per week and type
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 9 |
weekly_traders = (
|
| 10 |
+
df.groupby(["month_year_week", "trader_type"])["trader_address"]
|
| 11 |
+
.nunique()
|
| 12 |
+
.reset_index()
|
| 13 |
)
|
| 14 |
+
weekly_traders = weekly_traders.sort_values(["trader_type", "month_year_week"])
|
| 15 |
|
| 16 |
# Calculate retention
|
| 17 |
retention = []
|
| 18 |
+
# Iterate through each trader type
|
| 19 |
+
for trader_type in weekly_traders["trader_type"].unique():
|
| 20 |
+
type_data = weekly_traders[weekly_traders["trader_type"] == trader_type]
|
| 21 |
+
|
| 22 |
+
# Calculate retention for each week within this trader type
|
| 23 |
+
for i in range(1, len(type_data)):
|
| 24 |
+
current_week = type_data.iloc[i]["month_year_week"]
|
| 25 |
+
previous_week = type_data.iloc[i - 1]["month_year_week"]
|
| 26 |
+
|
| 27 |
+
# Get traders in both weeks for this type
|
| 28 |
+
current_traders = set(
|
| 29 |
+
df[
|
| 30 |
+
(df["month_year_week"] == current_week)
|
| 31 |
+
& (df["trader_type"] == trader_type)
|
| 32 |
+
]["trader_address"]
|
| 33 |
+
)
|
| 34 |
+
|
| 35 |
+
previous_traders = set(
|
| 36 |
+
df[
|
| 37 |
+
(df["month_year_week"] == previous_week)
|
| 38 |
+
& (df["trader_type"] == trader_type)
|
| 39 |
+
]["trader_address"]
|
| 40 |
+
)
|
| 41 |
+
|
| 42 |
+
retained = len(current_traders.intersection(previous_traders))
|
| 43 |
+
retention_rate = (
|
| 44 |
+
(retained / len(previous_traders)) * 100
|
| 45 |
+
if len(previous_traders) > 0
|
| 46 |
+
else 0
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
retention.append(
|
| 50 |
+
{
|
| 51 |
+
"trader_type": trader_type,
|
| 52 |
+
"week": current_week,
|
| 53 |
+
"retained_traders": retained,
|
| 54 |
+
"previous_traders": len(previous_traders),
|
| 55 |
+
"retention_rate": round(retention_rate, 2),
|
| 56 |
+
}
|
| 57 |
+
)
|
| 58 |
|
| 59 |
return pd.DataFrame(retention)
|
| 60 |
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
# Cohort Retention
|
| 63 |
+
def calculate_cohort_retention(df, max_weeks=12) -> pd.DataFrame:
|
| 64 |
# Get first week for each trader
|
|
|
|
| 65 |
first_trades = (
|
| 66 |
df.groupby("trader_address")
|
| 67 |
.agg({"creation_timestamp": "min", "month_year_week": "first"})
|
|
|
|
| 100 |
# Convert to percentages
|
| 101 |
retention_matrix = retention_matrix.div(cohort_sizes, axis=0) * 100
|
| 102 |
|
| 103 |
+
# Sort index (cohort_week) chronologically
|
| 104 |
+
retention_matrix.index = pd.to_datetime(retention_matrix.index)
|
| 105 |
+
retention_matrix = retention_matrix.sort_index()
|
| 106 |
+
|
| 107 |
# Limit to max_weeks if specified
|
| 108 |
if max_weeks is not None and max_weeks < retention_matrix.shape[1]:
|
| 109 |
retention_matrix = retention_matrix.iloc[:, :max_weeks]
|
|
|
|
| 111 |
return retention_matrix.round(2)
|
| 112 |
|
| 113 |
|
| 114 |
+
def prepare_retention_dataset() -> pd.DataFrame:
|
| 115 |
+
# read all datasets
|
| 116 |
+
traders_df = pd.read_parquet(DATA_DIR / "all_trades_profitability.parquet")
|
| 117 |
+
unknown_df = pd.read_parquet(DATA_DIR / "unknown_traders.parquet")
|
| 118 |
+
traders_df["trader_type"] = traders_df["staking"].apply(
|
| 119 |
+
lambda x: "non_Olas" if x == "non_Olas" else "Olas"
|
| 120 |
+
)
|
| 121 |
+
unknown_df["trader_type"] = "unclassified"
|
| 122 |
+
all_traders = pd.concat([traders_df, unknown_df], ignore_index=True)
|
| 123 |
+
|
| 124 |
+
all_traders["creation_timestamp"] = pd.to_datetime(
|
| 125 |
+
all_traders["creation_timestamp"]
|
| 126 |
+
)
|
| 127 |
+
all_traders = all_traders.sort_values(by="creation_timestamp", ascending=True)
|
| 128 |
+
all_traders["month_year_week"] = (
|
| 129 |
+
all_traders["creation_timestamp"].dt.to_period("W").dt.strftime("%b-%d-%Y")
|
| 130 |
)
|
| 131 |
+
return all_traders
|
| 132 |
+
|
| 133 |
+
|
| 134 |
+
if __name__ == "__main__":
|
| 135 |
+
all_traders = prepare_retention_dataset()
|
| 136 |
# Usage example:
|
| 137 |
+
wow_retention = calculate_wow_retention_by_type(all_traders)
|
| 138 |
+
cohort_retention = calculate_cohort_retention(all_traders)
|
|
|