File size: 1,941 Bytes
a84c47e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
561706e
a84c47e
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import pandas as pd
import numpy as np
import datetime


def create_mock_data():
    dates = pd.date_range(start="2024-10-01", end="2024-11-01", freq="D")
    tickers = ["AAPL", "TSLA", "AMZN", "GOOG", "MSFT"]

    rows = []
    for d in dates:
        for t in tickers:
            # Randomize values to meet filters most of the time
            entry = 10.0 + np.random.randn()
            # columns: premarket_change_from_perviousday_perc > 8 and premarket_close > 2 and `Shares Float`>1e6 and `Market Capitalization`<100e6
            rows.append(
                {
                    "datetime": d,
                    "Ticker": t,
                    "premarket_change_from_perviousday_perc": 10.0 + np.random.randn(),
                    "premarket_close": entry,
                    "Shares Float": 2e6,
                    "Market Capitalization": 50e6,
                    "marketsession_1min": entry * (1 - 0.01 * np.random.randn()),
                    "marketsession_3min": entry * (1 - 0.02 * np.random.randn()),
                    "marketsession_5min": entry * (1 - 0.03 * np.random.randn()),
                    "marketsession_10min": entry * (1 - 0.04 * np.random.randn()),
                    "marketsession_15min": entry * (1 - 0.05 * np.random.randn()),
                    "marketsession_30min": entry * (1 - 0.06 * np.random.randn()),
                    "marketsession_60min": entry * (1 - 0.07 * np.random.randn()),
                    "marketsession_120min": entry * (1 - 0.08 * np.random.randn()),
                    "marketsession_high": entry * 1.1,
                    "marketsession_close": entry * 0.9,
                }
            )

    df = pd.DataFrame(rows)
    filename = "marketsession_post_polygon_2020-01-01_2026-01-01.parquet_with_premarketvolume900K_marketcap1B.parquet"
    df.to_parquet(filename)
    print(f"Mock data created: {filename}")


if __name__ == "__main__":
    create_mock_data()