use parquet instead of csv
Browse files- app.py +4 -4
- data/{all_trades_profitability.csv → all_trades_profitability.parquet} +2 -2
- data/{fpmmTrades.csv → fpmmTrades.parquet} +2 -2
- data/{fpmms.csv → fpmms.parquet} +2 -2
- data/requests.csv +0 -3
- data/requests.parquet +3 -0
- data/{summary_profitability.csv → summary_profitability.parquet} +2 -2
- data/t_map.pkl +2 -2
- increase_zero_mech_calls.ipynb +0 -0
- scripts/markets.py +3 -2
- scripts/profitability.py +19 -15
- scripts/pull_data.py +5 -5
- scripts/tools.py +25 -20
- test.ipynb +0 -0
app.py
CHANGED
|
@@ -106,8 +106,8 @@ def refresh_data():
|
|
| 106 |
|
| 107 |
logging.info("Refreshing data...")
|
| 108 |
|
| 109 |
-
tools_df = pd.
|
| 110 |
-
trades_df = pd.
|
| 111 |
trades_df = prepare_trades(trades_df)
|
| 112 |
error_df = get_error_data(tools_df=tools_df, inc_tools=INC_TOOLS)
|
| 113 |
error_overall_df = get_error_data_overall(error_df=error_df)
|
|
@@ -134,8 +134,8 @@ def pull_refresh_data():
|
|
| 134 |
refresh_data()
|
| 135 |
|
| 136 |
|
| 137 |
-
tools_df = pd.
|
| 138 |
-
trades_df = pd.
|
| 139 |
trades_df = prepare_trades(trades_df)
|
| 140 |
|
| 141 |
|
|
|
|
| 106 |
|
| 107 |
logging.info("Refreshing data...")
|
| 108 |
|
| 109 |
+
tools_df = pd.read_parquet("./data/tools.parquet")
|
| 110 |
+
trades_df = pd.read_parquet("./data/all_trades_profitability.parquet")
|
| 111 |
trades_df = prepare_trades(trades_df)
|
| 112 |
error_df = get_error_data(tools_df=tools_df, inc_tools=INC_TOOLS)
|
| 113 |
error_overall_df = get_error_data_overall(error_df=error_df)
|
|
|
|
| 134 |
refresh_data()
|
| 135 |
|
| 136 |
|
| 137 |
+
tools_df = pd.read_parquet("./data/tools.parquet")
|
| 138 |
+
trades_df = pd.read_parquet("./data/all_trades_profitability.parquet")
|
| 139 |
trades_df = prepare_trades(trades_df)
|
| 140 |
|
| 141 |
|
data/{all_trades_profitability.csv → all_trades_profitability.parquet}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ae0de6d7e607b8ac33140081ab5415b9c16e7359d23b196e555535af0d78965c
|
| 3 |
+
size 8251611
|
data/{fpmmTrades.csv → fpmmTrades.parquet}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb0cd005a2bb7b37b04e0388538249ab6434c9de532b337fcee775ab9205064c
|
| 3 |
+
size 20528876
|
data/{fpmms.csv → fpmms.parquet}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b0b82cf173571152d11bbcabd94f675e8d84c148925f47a96c5192d9b9e2f67
|
| 3 |
+
size 319767
|
data/requests.csv
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:861e85f3437c0c75001e8b10731b91c643f0e0ef0bab214257c26d2a25fa9628
|
| 3 |
-
size 168361105
|
|
|
|
|
|
|
|
|
|
|
|
data/requests.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a3de88b6c91037ed4245a60473dcca4dce395d1583ec5cb39f79ab0e42759904
|
| 3 |
+
size 46486507
|
data/{summary_profitability.csv → summary_profitability.parquet}
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0ef6d6a03b5f872d0228881b74e3a2427c4e8a5f7fd02776eb70683605ccbb4b
|
| 3 |
+
size 52394
|
data/t_map.pkl
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2738a5a8e98ca83c409251237cc338ed540c0ea58779bf23ea59255fa88b42d5
|
| 3 |
+
size 7749840
|
increase_zero_mech_calls.ipynb
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
scripts/markets.py
CHANGED
|
@@ -46,7 +46,7 @@ QUESTION_FIELD = "question"
|
|
| 46 |
OUTCOMES_FIELD = "outcomes"
|
| 47 |
TITLE_FIELD = "title"
|
| 48 |
MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
|
| 49 |
-
DEFAULT_FILENAME = "fpmms.
|
| 50 |
SCRIPTS_DIR = Path(__file__).parent
|
| 51 |
ROOT_DIR = SCRIPTS_DIR.parent
|
| 52 |
DATA_DIR = ROOT_DIR / "data"
|
|
@@ -218,10 +218,11 @@ def etl(filename: Optional[str] = None) -> pd.DataFrame:
|
|
| 218 |
fpmms = transform_fpmms(fpmms)
|
| 219 |
|
| 220 |
if filename:
|
| 221 |
-
fpmms.
|
| 222 |
|
| 223 |
return fpmms
|
| 224 |
|
| 225 |
|
| 226 |
if __name__ == "__main__":
|
| 227 |
etl(DEFAULT_FILENAME)
|
|
|
|
|
|
| 46 |
OUTCOMES_FIELD = "outcomes"
|
| 47 |
TITLE_FIELD = "title"
|
| 48 |
MAX_UINT_HEX = "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
|
| 49 |
+
DEFAULT_FILENAME = "fpmms.parquet"
|
| 50 |
SCRIPTS_DIR = Path(__file__).parent
|
| 51 |
ROOT_DIR = SCRIPTS_DIR.parent
|
| 52 |
DATA_DIR = ROOT_DIR / "data"
|
|
|
|
| 218 |
fpmms = transform_fpmms(fpmms)
|
| 219 |
|
| 220 |
if filename:
|
| 221 |
+
fpmms.to_parquet(DATA_DIR / filename, index=False)
|
| 222 |
|
| 223 |
return fpmms
|
| 224 |
|
| 225 |
|
| 226 |
if __name__ == "__main__":
|
| 227 |
etl(DEFAULT_FILENAME)
|
| 228 |
+
|
scripts/profitability.py
CHANGED
|
@@ -385,7 +385,7 @@ def create_fpmmTrades(rpc: str):
|
|
| 385 |
df.rename(columns={"creator": "trader_address"}, inplace=True)
|
| 386 |
|
| 387 |
# save to csv
|
| 388 |
-
df.
|
| 389 |
|
| 390 |
return df
|
| 391 |
|
|
@@ -396,7 +396,7 @@ def prepare_profitalibity_data(rpc: str):
|
|
| 396 |
# Check if tools.py is in the same directory
|
| 397 |
try:
|
| 398 |
# load tools.csv
|
| 399 |
-
tools = pd.
|
| 400 |
|
| 401 |
# make sure creator_address is in the columns
|
| 402 |
assert "trader_address" in tools.columns, "trader_address column not found"
|
|
@@ -407,21 +407,21 @@ def prepare_profitalibity_data(rpc: str):
|
|
| 407 |
# drop duplicates
|
| 408 |
tools.drop_duplicates(inplace=True)
|
| 409 |
|
| 410 |
-
print("tools.
|
| 411 |
except FileNotFoundError:
|
| 412 |
-
print("tools.
|
| 413 |
return
|
| 414 |
|
| 415 |
# Check if fpmmTrades.csv is in the same directory
|
| 416 |
try:
|
| 417 |
# load fpmmTrades.csv
|
| 418 |
-
fpmmTrades = pd.
|
| 419 |
-
print("fpmmTrades.
|
| 420 |
except FileNotFoundError:
|
| 421 |
-
print("fpmmTrades.
|
| 422 |
fpmmTrades = create_fpmmTrades(rpc)
|
| 423 |
-
fpmmTrades.
|
| 424 |
-
fpmmTrades = pd.
|
| 425 |
|
| 426 |
# make sure trader_address is in the columns
|
| 427 |
assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
|
|
@@ -434,13 +434,13 @@ def prepare_profitalibity_data(rpc: str):
|
|
| 434 |
|
| 435 |
def determine_market_status(trade, current_answer):
|
| 436 |
"""Determine the market status of a trade."""
|
| 437 |
-
if current_answer is np.nan and time.time() >= trade["fpmm.openingTimestamp"]:
|
| 438 |
return MarketState.PENDING
|
| 439 |
elif current_answer == np.nan:
|
| 440 |
return MarketState.OPEN
|
| 441 |
elif trade["fpmm.isPendingArbitration"]:
|
| 442 |
return MarketState.ARBITRATING
|
| 443 |
-
elif time.time() < trade["fpmm.answerFinalizedTimestamp"]:
|
| 444 |
return MarketState.FINALIZING
|
| 445 |
return MarketState.CLOSED
|
| 446 |
|
|
@@ -468,9 +468,12 @@ def analyse_trader(
|
|
| 468 |
# Iterate over the trades
|
| 469 |
for i, trade in tqdm(trades.iterrows(), total=len(trades), desc="Analysing trades"):
|
| 470 |
try:
|
|
|
|
|
|
|
|
|
|
| 471 |
# Parsing and computing shared values
|
| 472 |
creation_timestamp_utc = datetime.datetime.fromtimestamp(
|
| 473 |
-
trade["creationTimestamp"], tz=datetime.timezone.utc
|
| 474 |
)
|
| 475 |
collateral_amount = wei_to_unit(float(trade["collateralAmount"]))
|
| 476 |
fee_amount = wei_to_unit(float(trade["feeAmount"]))
|
|
@@ -497,7 +500,7 @@ def analyse_trader(
|
|
| 497 |
if is_invalid:
|
| 498 |
earnings = collateral_amount
|
| 499 |
winner_trade = False
|
| 500 |
-
elif trade["outcomeIndex"] == current_answer:
|
| 501 |
earnings = outcome_tokens_traded
|
| 502 |
winner_trade = True
|
| 503 |
|
|
@@ -610,6 +613,7 @@ def run_profitability_analysis(rpc):
|
|
| 610 |
# load dfs from csv for analysis
|
| 611 |
print("Preparing data...")
|
| 612 |
fpmmTrades, tools = prepare_profitalibity_data(rpc)
|
|
|
|
| 613 |
|
| 614 |
# all trades profitability df
|
| 615 |
print("Analysing trades...")
|
|
@@ -620,8 +624,8 @@ def run_profitability_analysis(rpc):
|
|
| 620 |
summary_df = summary_analyse(all_trades_df)
|
| 621 |
|
| 622 |
# save to csv
|
| 623 |
-
all_trades_df.
|
| 624 |
-
summary_df.
|
| 625 |
|
| 626 |
print("Done!")
|
| 627 |
|
|
|
|
| 385 |
df.rename(columns={"creator": "trader_address"}, inplace=True)
|
| 386 |
|
| 387 |
# save to csv
|
| 388 |
+
df.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
|
| 389 |
|
| 390 |
return df
|
| 391 |
|
|
|
|
| 396 |
# Check if tools.py is in the same directory
|
| 397 |
try:
|
| 398 |
# load tools.csv
|
| 399 |
+
tools = pd.read_parquet(DATA_DIR / "tools.parquet")
|
| 400 |
|
| 401 |
# make sure creator_address is in the columns
|
| 402 |
assert "trader_address" in tools.columns, "trader_address column not found"
|
|
|
|
| 407 |
# drop duplicates
|
| 408 |
tools.drop_duplicates(inplace=True)
|
| 409 |
|
| 410 |
+
print("tools.parquet loaded")
|
| 411 |
except FileNotFoundError:
|
| 412 |
+
print("tools.parquet not found. Please run tools.py first.")
|
| 413 |
return
|
| 414 |
|
| 415 |
# Check if fpmmTrades.csv is in the same directory
|
| 416 |
try:
|
| 417 |
# load fpmmTrades.csv
|
| 418 |
+
fpmmTrades = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
|
| 419 |
+
print("fpmmTrades.parquet loaded")
|
| 420 |
except FileNotFoundError:
|
| 421 |
+
print("fpmmTrades.parquet not found. Creating fpmmTrades.parquet...")
|
| 422 |
fpmmTrades = create_fpmmTrades(rpc)
|
| 423 |
+
fpmmTrades.to_parquet(DATA_DIR / "fpmmTrades.parquet", index=False)
|
| 424 |
+
fpmmTrades = pd.read_parquet(DATA_DIR / "fpmmTrades.parquet")
|
| 425 |
|
| 426 |
# make sure trader_address is in the columns
|
| 427 |
assert "trader_address" in fpmmTrades.columns, "trader_address column not found"
|
|
|
|
| 434 |
|
| 435 |
def determine_market_status(trade, current_answer):
|
| 436 |
"""Determine the market status of a trade."""
|
| 437 |
+
if current_answer is np.nan and time.time() >= int(trade["fpmm.openingTimestamp"]):
|
| 438 |
return MarketState.PENDING
|
| 439 |
elif current_answer == np.nan:
|
| 440 |
return MarketState.OPEN
|
| 441 |
elif trade["fpmm.isPendingArbitration"]:
|
| 442 |
return MarketState.ARBITRATING
|
| 443 |
+
elif time.time() < int(trade["fpmm.answerFinalizedTimestamp"]):
|
| 444 |
return MarketState.FINALIZING
|
| 445 |
return MarketState.CLOSED
|
| 446 |
|
|
|
|
| 468 |
# Iterate over the trades
|
| 469 |
for i, trade in tqdm(trades.iterrows(), total=len(trades), desc="Analysing trades"):
|
| 470 |
try:
|
| 471 |
+
if not trade['fpmm.currentAnswer']:
|
| 472 |
+
print(f"Skipping trade {i} because currentAnswer is NaN")
|
| 473 |
+
continue
|
| 474 |
# Parsing and computing shared values
|
| 475 |
creation_timestamp_utc = datetime.datetime.fromtimestamp(
|
| 476 |
+
int(trade["creationTimestamp"]), tz=datetime.timezone.utc
|
| 477 |
)
|
| 478 |
collateral_amount = wei_to_unit(float(trade["collateralAmount"]))
|
| 479 |
fee_amount = wei_to_unit(float(trade["feeAmount"]))
|
|
|
|
| 500 |
if is_invalid:
|
| 501 |
earnings = collateral_amount
|
| 502 |
winner_trade = False
|
| 503 |
+
elif int(trade["outcomeIndex"]) == current_answer:
|
| 504 |
earnings = outcome_tokens_traded
|
| 505 |
winner_trade = True
|
| 506 |
|
|
|
|
| 613 |
# load dfs from csv for analysis
|
| 614 |
print("Preparing data...")
|
| 615 |
fpmmTrades, tools = prepare_profitalibity_data(rpc)
|
| 616 |
+
tools['trader_address'] = tools['trader_address'].str.lower()
|
| 617 |
|
| 618 |
# all trades profitability df
|
| 619 |
print("Analysing trades...")
|
|
|
|
| 624 |
summary_df = summary_analyse(all_trades_df)
|
| 625 |
|
| 626 |
# save to csv
|
| 627 |
+
all_trades_df.to_parquet(DATA_DIR / "all_trades_profitability.parquet", index=False)
|
| 628 |
+
summary_df.to_parquet(DATA_DIR / "summary_profitability.parquet", index=False)
|
| 629 |
|
| 630 |
print("Done!")
|
| 631 |
|
scripts/pull_data.py
CHANGED
|
@@ -85,16 +85,16 @@ def weekly_analysis():
|
|
| 85 |
|
| 86 |
# Run profitability analysis
|
| 87 |
logging.info("Running profitability analysis")
|
| 88 |
-
if os.path.exists(DATA_DIR / "fpmmTrades.
|
| 89 |
-
os.remove(DATA_DIR / "fpmmTrades.
|
| 90 |
run_profitability_analysis(
|
| 91 |
rpc=rpc,
|
| 92 |
)
|
| 93 |
logging.info("Profitability analysis completed")
|
| 94 |
|
| 95 |
# Get currentAnswer from FPMMS
|
| 96 |
-
fpmms = pd.
|
| 97 |
-
tools = pd.
|
| 98 |
|
| 99 |
# Get the question from the tools
|
| 100 |
logging.info("Getting the question and current answer for the tools")
|
|
@@ -123,7 +123,7 @@ def weekly_analysis():
|
|
| 123 |
tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)
|
| 124 |
|
| 125 |
# Save the tools
|
| 126 |
-
tools.
|
| 127 |
|
| 128 |
# Update t_map with new timestamps
|
| 129 |
new_timestamps = tools[['request_block', 'request_time']].dropna().set_index('request_block').to_dict()['request_time']
|
|
|
|
| 85 |
|
| 86 |
# Run profitability analysis
|
| 87 |
logging.info("Running profitability analysis")
|
| 88 |
+
if os.path.exists(DATA_DIR / "fpmmTrades.parquet"):
|
| 89 |
+
os.remove(DATA_DIR / "fpmmTrades.parquet")
|
| 90 |
run_profitability_analysis(
|
| 91 |
rpc=rpc,
|
| 92 |
)
|
| 93 |
logging.info("Profitability analysis completed")
|
| 94 |
|
| 95 |
# Get currentAnswer from FPMMS
|
| 96 |
+
fpmms = pd.read_parquet(DATA_DIR / MARKETS_FILENAME)
|
| 97 |
+
tools = pd.read_parquet(DATA_DIR / TOOLS_FILENAME)
|
| 98 |
|
| 99 |
# Get the question from the tools
|
| 100 |
logging.info("Getting the question and current answer for the tools")
|
|
|
|
| 123 |
tools['request_month_year_week'] = pd.to_datetime(tools['request_time']).dt.to_period('W').astype(str)
|
| 124 |
|
| 125 |
# Save the tools
|
| 126 |
+
tools.to_parquet(DATA_DIR / TOOLS_FILENAME, index=False)
|
| 127 |
|
| 128 |
# Update t_map with new timestamps
|
| 129 |
new_timestamps = tools[['request_block', 'request_time']].dropna().set_index('request_block').to_dict()['request_time']
|
scripts/tools.py
CHANGED
|
@@ -86,7 +86,7 @@ IPFS_ADDRESS = f"{HTTPS}gateway.autonolas.tech/ipfs/"
|
|
| 86 |
IPFS_LINKS_SERIES_NAME = "ipfs_links"
|
| 87 |
BACKOFF_FACTOR = 1
|
| 88 |
STATUS_FORCELIST = [404, 500, 502, 503, 504]
|
| 89 |
-
DEFAULT_FILENAME = "tools.
|
| 90 |
RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
|
| 91 |
ABI_ERROR = "The event signature did not match the provided ABI"
|
| 92 |
SLEEP = 0.5
|
|
@@ -580,7 +580,7 @@ def transform_deliver(contents: pd.DataFrame, full_contents=False) -> pd.DataFra
|
|
| 580 |
|
| 581 |
def gen_event_filename(event_name: MechEventName) -> str:
|
| 582 |
"""Generate the filename of an event."""
|
| 583 |
-
return f"{event_name.value.lower()}s.
|
| 584 |
|
| 585 |
|
| 586 |
def read_n_last_lines(filename: str, n: int = 1) -> str:
|
|
@@ -605,33 +605,38 @@ def get_earliest_block(event_name: MechEventName) -> int:
|
|
| 605 |
if not os.path.exists(DATA_DIR / filename):
|
| 606 |
return 0
|
| 607 |
|
| 608 |
-
|
| 609 |
-
last_line_buff = StringIO(read_n_last_lines(DATA_DIR/filename))
|
| 610 |
-
last_line_series = pd.read_csv(last_line_buff, names=cols)
|
| 611 |
block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
|
| 612 |
-
return int(
|
| 613 |
|
| 614 |
|
| 615 |
def store_progress(
|
| 616 |
filename: str,
|
| 617 |
-
event_to_contents: Dict[
|
| 618 |
tools: pd.DataFrame,
|
| 619 |
) -> None:
|
| 620 |
"""Store the given progress."""
|
| 621 |
if filename:
|
|
|
|
| 622 |
for event_name, content in event_to_contents.items():
|
| 623 |
-
event_filename = gen_event_filename(event_name)
|
| 624 |
-
|
| 625 |
-
|
| 626 |
-
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
| 631 |
-
|
| 632 |
-
|
| 633 |
-
|
| 634 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 635 |
|
| 636 |
|
| 637 |
def etl(
|
|
@@ -736,7 +741,7 @@ def etl(
|
|
| 736 |
events_filename = gen_event_filename(event_name)
|
| 737 |
|
| 738 |
if os.path.exists(DATA_DIR / events_filename):
|
| 739 |
-
old = pd.
|
| 740 |
|
| 741 |
# Reset index to avoid index conflicts
|
| 742 |
old.reset_index(drop=True, inplace=True)
|
|
|
|
| 86 |
IPFS_LINKS_SERIES_NAME = "ipfs_links"
|
| 87 |
BACKOFF_FACTOR = 1
|
| 88 |
STATUS_FORCELIST = [404, 500, 502, 503, 504]
|
| 89 |
+
DEFAULT_FILENAME = "tools.parquet"
|
| 90 |
RE_RPC_FILTER_ERROR = r"Filter with id: '\d+' does not exist."
|
| 91 |
ABI_ERROR = "The event signature did not match the provided ABI"
|
| 92 |
SLEEP = 0.5
|
|
|
|
| 580 |
|
| 581 |
def gen_event_filename(event_name: MechEventName) -> str:
|
| 582 |
"""Generate the filename of an event."""
|
| 583 |
+
return f"{event_name.value.lower()}s.parquet"
|
| 584 |
|
| 585 |
|
| 586 |
def read_n_last_lines(filename: str, n: int = 1) -> str:
|
|
|
|
| 605 |
if not os.path.exists(DATA_DIR / filename):
|
| 606 |
return 0
|
| 607 |
|
| 608 |
+
df = pd.read_parquet(DATA_DIR / filename)
|
|
|
|
|
|
|
| 609 |
block_field = f"{event_name.value.lower()}_{BLOCK_FIELD}"
|
| 610 |
+
return int(df[block_field].max())
|
| 611 |
|
| 612 |
|
| 613 |
def store_progress(
|
| 614 |
filename: str,
|
| 615 |
+
event_to_contents: Dict[str, pd.DataFrame],
|
| 616 |
tools: pd.DataFrame,
|
| 617 |
) -> None:
|
| 618 |
"""Store the given progress."""
|
| 619 |
if filename:
|
| 620 |
+
DATA_DIR.mkdir(parents=True, exist_ok=True) # Ensure the directory exists
|
| 621 |
for event_name, content in event_to_contents.items():
|
| 622 |
+
event_filename = gen_event_filename(event_name) # Ensure this function returns a valid filename string
|
| 623 |
+
try:
|
| 624 |
+
if "result" in content.columns:
|
| 625 |
+
content = content.drop(columns=["result"]) # Avoid in-place modification
|
| 626 |
+
if 'error' in content.columns:
|
| 627 |
+
content['error'] = content['error'].astype(bool)
|
| 628 |
+
content.to_parquet(DATA_DIR / event_filename, index=False)
|
| 629 |
+
except Exception as e:
|
| 630 |
+
print(f"Failed to write {event_name}: {e}")
|
| 631 |
+
# Drop result and error columns for tools DataFrame
|
| 632 |
+
try:
|
| 633 |
+
if "result" in tools.columns:
|
| 634 |
+
tools = tools.drop(columns=["result"])
|
| 635 |
+
if 'error' in tools.columns:
|
| 636 |
+
tools['error'] = tools['error'].astype(bool)
|
| 637 |
+
tools.to_parquet(DATA_DIR / filename, index=False)
|
| 638 |
+
except Exception as e:
|
| 639 |
+
print(f"Failed to write tools data: {e}")
|
| 640 |
|
| 641 |
|
| 642 |
def etl(
|
|
|
|
| 741 |
events_filename = gen_event_filename(event_name)
|
| 742 |
|
| 743 |
if os.path.exists(DATA_DIR / events_filename):
|
| 744 |
+
old = pd.read_parquet(DATA_DIR / events_filename)
|
| 745 |
|
| 746 |
# Reset index to avoid index conflicts
|
| 747 |
old.reset_index(drop=True, inplace=True)
|
test.ipynb
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|