File size: 1,941 Bytes
a84c47e 561706e a84c47e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import pandas as pd
import numpy as np
import datetime
def create_mock_data():
dates = pd.date_range(start="2024-10-01", end="2024-11-01", freq="D")
tickers = ["AAPL", "TSLA", "AMZN", "GOOG", "MSFT"]
rows = []
for d in dates:
for t in tickers:
# Randomize values to meet filters most of the time
entry = 10.0 + np.random.randn()
# columns: premarket_change_from_perviousday_perc > 8 and premarket_close > 2 and `Shares Float`>1e6 and `Market Capitalization`<100e6
rows.append(
{
"datetime": d,
"Ticker": t,
"premarket_change_from_perviousday_perc": 10.0 + np.random.randn(),
"premarket_close": entry,
"Shares Float": 2e6,
"Market Capitalization": 50e6,
"marketsession_1min": entry * (1 - 0.01 * np.random.randn()),
"marketsession_3min": entry * (1 - 0.02 * np.random.randn()),
"marketsession_5min": entry * (1 - 0.03 * np.random.randn()),
"marketsession_10min": entry * (1 - 0.04 * np.random.randn()),
"marketsession_15min": entry * (1 - 0.05 * np.random.randn()),
"marketsession_30min": entry * (1 - 0.06 * np.random.randn()),
"marketsession_60min": entry * (1 - 0.07 * np.random.randn()),
"marketsession_120min": entry * (1 - 0.08 * np.random.randn()),
"marketsession_high": entry * 1.1,
"marketsession_close": entry * 0.9,
}
)
df = pd.DataFrame(rows)
filename = "marketsession_post_polygon_2020-01-01_2026-01-01.parquet_with_premarketvolume900K_marketcap1B.parquet"
df.to_parquet(filename)
print(f"Mock data created: {filename}")
if __name__ == "__main__":
create_mock_data()
|