import pandas as pd import numpy as np import datetime def create_mock_data(): dates = pd.date_range(start="2024-10-01", end="2024-11-01", freq="D") tickers = ["AAPL", "TSLA", "AMZN", "GOOG", "MSFT"] rows = [] for d in dates: for t in tickers: # Randomize values to meet filters most of the time entry = 10.0 + np.random.randn() # columns: premarket_change_from_perviousday_perc > 8 and premarket_close > 2 and `Shares Float`>1e6 and `Market Capitalization`<100e6 rows.append( { "datetime": d, "Ticker": t, "premarket_change_from_perviousday_perc": 10.0 + np.random.randn(), "premarket_close": entry, "Shares Float": 2e6, "Market Capitalization": 50e6, "marketsession_1min": entry * (1 - 0.01 * np.random.randn()), "marketsession_3min": entry * (1 - 0.02 * np.random.randn()), "marketsession_5min": entry * (1 - 0.03 * np.random.randn()), "marketsession_10min": entry * (1 - 0.04 * np.random.randn()), "marketsession_15min": entry * (1 - 0.05 * np.random.randn()), "marketsession_30min": entry * (1 - 0.06 * np.random.randn()), "marketsession_60min": entry * (1 - 0.07 * np.random.randn()), "marketsession_120min": entry * (1 - 0.08 * np.random.randn()), "marketsession_high": entry * 1.1, "marketsession_close": entry * 0.9, } ) df = pd.DataFrame(rows) filename = "marketsession_post_polygon_2020-01-01_2026-01-01.parquet_with_premarketvolume900K_marketcap1B.parquet" df.to_parquet(filename) print(f"Mock data created: {filename}") if __name__ == "__main__": create_mock_data()