File size: 5,142 Bytes
be7c937
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#include <gtest/gtest.h>
#include "wayy_db/table.hpp"
#include "wayy_db/ops/joins.hpp"

using namespace wayy_db;

class JoinsTest : public ::testing::Test {
protected:
    Table create_trades() {
        Table trades("trades");

        // Trades at times 100, 200, 300 for symbols 0 (AAPL) and 1 (MSFT)
        std::vector<int64_t> timestamps = {100, 150, 200, 250, 300};
        std::vector<uint32_t> symbols = {0, 1, 0, 1, 0};  // AAPL, MSFT, AAPL, MSFT, AAPL
        std::vector<double> prices = {150.0, 380.0, 151.0, 381.0, 152.0};
        std::vector<int64_t> sizes = {100, 200, 150, 250, 100};

        trades.add_column("timestamp", DType::Timestamp, timestamps.data(), timestamps.size());
        trades.add_column("symbol", DType::Symbol, symbols.data(), symbols.size());
        trades.add_column("price", DType::Float64, prices.data(), prices.size());
        trades.add_column("size", DType::Int64, sizes.data(), sizes.size());
        trades.set_sorted_by("timestamp");

        return trades;
    }

    Table create_quotes() {
        Table quotes("quotes");

        // Quotes at times 50, 90, 140, 190, 280
        std::vector<int64_t> timestamps = {50, 90, 140, 190, 280};
        std::vector<uint32_t> symbols = {0, 1, 0, 1, 0};
        std::vector<double> bids = {149.5, 379.5, 150.5, 380.5, 151.5};
        std::vector<double> asks = {150.0, 380.0, 151.0, 381.0, 152.0};

        quotes.add_column("timestamp", DType::Timestamp, timestamps.data(), timestamps.size());
        quotes.add_column("symbol", DType::Symbol, symbols.data(), symbols.size());
        quotes.add_column("bid", DType::Float64, bids.data(), bids.size());
        quotes.add_column("ask", DType::Float64, asks.data(), asks.size());
        quotes.set_sorted_by("timestamp");

        return quotes;
    }
};

TEST_F(JoinsTest, AsOfJoinBasic) {
    auto trades = create_trades();
    auto quotes = create_quotes();

    auto result = ops::aj(trades, quotes, {"symbol"}, "timestamp");

    // Result should have same number of rows as trades
    EXPECT_EQ(result.num_rows(), 5);

    // Check that we have columns from both tables
    EXPECT_TRUE(result.has_column("timestamp"));
    EXPECT_TRUE(result.has_column("symbol"));
    EXPECT_TRUE(result.has_column("price"));
    EXPECT_TRUE(result.has_column("bid"));
    EXPECT_TRUE(result.has_column("ask"));

    // Verify as-of semantics:
    // Trade at t=100, symbol=AAPL should get quote at t=90... wait, that's MSFT
    // Trade at t=100, symbol=AAPL should get quote at t=50 (AAPL)
    auto bids = result.column("bid").as_float64();
    EXPECT_DOUBLE_EQ(bids[0], 149.5);  // AAPL trade at 100 -> AAPL quote at 50

    // Trade at t=150, symbol=MSFT should get quote at t=90 (MSFT)
    EXPECT_DOUBLE_EQ(bids[1], 379.5);

    // Trade at t=200, symbol=AAPL should get quote at t=140 (AAPL)
    EXPECT_DOUBLE_EQ(bids[2], 150.5);
}

TEST_F(JoinsTest, AsOfJoinRequiresSorted) {
    Table left("left");
    Table right("right");

    std::vector<int64_t> ts = {1, 2, 3};
    left.add_column("ts", DType::Timestamp, ts.data(), ts.size());
    right.add_column("ts", DType::Timestamp, ts.data(), ts.size());

    // Neither is sorted
    EXPECT_THROW(ops::aj(left, right, {}, "ts"), InvalidOperation);

    // Only left is sorted
    left.set_sorted_by("ts");
    EXPECT_THROW(ops::aj(left, right, {}, "ts"), InvalidOperation);
}

TEST_F(JoinsTest, WindowJoinBasic) {
    auto trades = create_trades();
    auto quotes = create_quotes();

    // Window: 60ns before, 0ns after
    auto result = ops::wj(trades, quotes, {"symbol"}, "timestamp", 60, 0);

    // Window join may have more rows than left table
    EXPECT_GT(result.num_rows(), 0);

    // Check columns exist
    EXPECT_TRUE(result.has_column("bid"));
    EXPECT_TRUE(result.has_column("price"));
}

TEST_F(JoinsTest, AsOfJoinNoMatches) {
    Table trades("trades");
    Table quotes("quotes");

    // Trades for symbol 0
    std::vector<int64_t> trade_ts = {100, 200};
    std::vector<uint32_t> trade_sym = {0, 0};
    std::vector<double> trade_px = {100.0, 101.0};

    trades.add_column("timestamp", DType::Timestamp, trade_ts.data(), trade_ts.size());
    trades.add_column("symbol", DType::Symbol, trade_sym.data(), trade_sym.size());
    trades.add_column("price", DType::Float64, trade_px.data(), trade_px.size());
    trades.set_sorted_by("timestamp");

    // Quotes for symbol 1 (different symbol)
    std::vector<int64_t> quote_ts = {50, 150};
    std::vector<uint32_t> quote_sym = {1, 1};
    std::vector<double> quote_bid = {99.0, 100.0};

    quotes.add_column("timestamp", DType::Timestamp, quote_ts.data(), quote_ts.size());
    quotes.add_column("symbol", DType::Symbol, quote_sym.data(), quote_sym.size());
    quotes.add_column("bid", DType::Float64, quote_bid.data(), quote_bid.size());
    quotes.set_sorted_by("timestamp");

    auto result = ops::aj(trades, quotes, {"symbol"}, "timestamp");

    // Should still have 2 rows, but bid should be 0 (null)
    EXPECT_EQ(result.num_rows(), 2);

    auto bids = result.column("bid").as_float64();
    EXPECT_DOUBLE_EQ(bids[0], 0.0);
    EXPECT_DOUBLE_EQ(bids[1], 0.0);
}