QuantumLearner commited on
Commit
9c8b6c3
·
verified ·
1 Parent(s): b8754bb

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +361 -0
app.py ADDED
@@ -0,0 +1,361 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import numpy as np
3
+ import pandas as pd
4
+ import yfinance as yf
5
+ import plotly.graph_objects as go
6
+ import datetime
7
+
8
+ # Set wide page layout
9
+ st.set_page_config(
10
+ page_title="Pattern Recognition with KNN and Lorentzian Distance",
11
+ layout="wide"
12
+ )
13
+
14
+ # --- Sidebar Inputs ---
15
+ st.sidebar.title("Input Parameters")
16
+ with st.sidebar.expander("Data Parameters", expanded=True):
17
+ ticker = st.text_input("Ticker", value="ASML.AS", help="Enter the ticker symbol.")
18
+ start_date = st.date_input("Start Date", value=datetime.date(2022, 1, 1), help="Select start date for daily data.")
19
+ end_date = st.date_input("End Date", value=datetime.date.today() + datetime.timedelta(days=1), help="Select end date for daily data.")
20
+
21
+ with st.sidebar.expander("Model Parameters", expanded=True):
22
+ neighborsCount = st.number_input(
23
+ "KNN Neighbors Count",
24
+ value=100,
25
+ min_value=1,
26
+ step=1,
27
+ help="Higher = smoother signals, lower = more reactive."
28
+ )
29
+ maxBarsBack = st.number_input(
30
+ "Lookback Bars",
31
+ value=500,
32
+ min_value=1,
33
+ step=1,
34
+ help="How far back to search for similar patterns. Longer = more data, shorter = more recent context."
35
+ )
36
+ window_length = st.number_input(
37
+ "Window Length",
38
+ value=5,
39
+ min_value=1,
40
+ step=1,
41
+ help="Number of bars per pattern. Longer = more structure, shorter = more sensitivity."
42
+ )
43
+ barLookahead = st.number_input(
44
+ "Bar Lookahead",
45
+ value=4,
46
+ min_value=1,
47
+ step=1,
48
+ help="How far ahead to judge outcomes. Longer = trend focus, shorter = short-term bias."
49
+ )
50
+
51
+ run_button = st.sidebar.button("Run Analysis")
52
+
53
+ # --- Title and Theory ---
54
+ st.title("Pattern Recognition via Unsupervised KNN")
55
+ st.markdown("#### **Compare recent market behavior to similar historical setups.**")
56
+
57
+ st.write("This tool leverages historical price patterns to generate trading signals via self-supervised pattern recall. Instead of training a model, it compares the current market state to similar past conditions using a custom KNN approach with Lorentzian distance.")
58
+
59
+ with st.expander("Methodology", expanded=False):
60
+ st.write(
61
+ """
62
+
63
+ **Market State Representation**
64
+ Each trading day is represented by a feature vector built from 5 technical indicators:
65
+ - **RSI:** Measures momentum.
66
+ - **Wave Trend Oscillator:** Smooths price data.
67
+ - **CCI:** Quantifies deviation from a moving average.
68
+ - **ADX:** Assesses trend strength.
69
+ - **Short-Term RSI:** Captures faster momentum.
70
+
71
+ A sliding window (default: 5 bars) forms a flattened feature vector (5 indicators × 5 bars = 25 values) that captures short-term behavior.
72
+
73
+ **Lorentzian Distance**
74
+ Similarity between market states is measured using Lorentzian distance:
75
+ $$
76
+ d(a, b) = \\sum_{i=1}^{N} \\log\\Big(1 + \\left|a_i - b_i\\right|\\Big)
77
+ $$
78
+ This function reduces the impact of extreme differences, making it robust to outliers.
79
+
80
+ **KNN-Based Signal Generation**
81
+ For each new market state, the tool:
82
+ 1. Compares its feature vector to past states within a user-defined lookback window.
83
+ 2. Selects the \(k\) nearest neighbors (default: 100) using Lorentzian distance.
84
+ 3. Retrieves future price movement labels (over the next 4 bars by default):
85
+ - \(+1\) if the price rises.
86
+ - \(-1\) if the price falls.
87
+ - \(0\) if the price remains flat.
88
+ 4. Sums these labels to create a directional score:
89
+ - A positive sum indicates a long signal.
90
+ - A negative sum indicates a short signal.
91
+ - A zero sum retains the previous signal.
92
+
93
+ **User Adjustable Variables**
94
+ You can adjust the following parameters in the sidebar. Each one controls how the pattern recognition behaves:
95
+
96
+ - **KNN Neighbors Count:**
97
+ Sets how many similar past patterns to compare against.
98
+ - Higher values smooth the signal and reduce noise.
99
+ - Lower values make it more reactive but may introduce false signals.
100
+
101
+ - **Lookback Bars:**
102
+ Defines how far back in history to search for similar patterns.
103
+ - A longer lookback gives more pattern variety but may include outdated behavior.
104
+ - A shorter lookback limits comparisons to recent market regimes.
105
+
106
+ - **Window Length:**
107
+ Determines how many consecutive bars are used to form each pattern (i.e., feature vector).
108
+ - Longer windows capture broader structure but reduce signal frequency.
109
+ - Shorter windows react faster but may miss context.
110
+
111
+ - **Bar Lookahead:**
112
+ Controls how far ahead the tool checks to define “what happened” after each past setup.
113
+ - A longer lookahead focuses on trend outcomes.
114
+ - A shorter lookahead favors short-term price moves.
115
+ """
116
+ )
117
+
118
+
119
+ if run_button:
120
+ try:
121
+ with st.spinner("Running analysis..."):
122
+ # --- Download Data ---
123
+ df = yf.download(ticker, start=start_date, end=end_date, interval="1d")
124
+ if df.empty:
125
+ st.error("No data returned. Please check your inputs.")
126
+ st.stop()
127
+
128
+ if isinstance(df.columns, pd.MultiIndex):
129
+ df.columns = df.columns.get_level_values(0)
130
+ df.rename(columns={"Open": "Open", "High": "High", "Low": "Low", "Close": "Close", "Volume": "Volume"}, inplace=True)
131
+ df.dropna(subset=["Close", "High", "Low"], inplace=True)
132
+ df["Date"] = df.index
133
+ df.reset_index(drop=True, inplace=True)
134
+ n = len(df)
135
+
136
+ # --- Indicator Functions ---
137
+ def rsi(series, length=14):
138
+ delta = series.diff()
139
+ gain = delta.clip(lower=0)
140
+ loss = -delta.clip(upper=0)
141
+ avg_gain = gain.ewm(alpha=1/length, adjust=False).mean()
142
+ avg_loss = loss.ewm(alpha=1/length, adjust=False).mean()
143
+ rs = avg_gain / avg_loss
144
+ return 100 - (100 / (1 + rs))
145
+
146
+ def wave_trend(hlc3, n1=10, n2=11):
147
+ esa = hlc3.ewm(span=n1, adjust=False).mean()
148
+ d = abs(hlc3 - esa).ewm(span=n1, adjust=False).mean()
149
+ ci = (hlc3 - esa) / (0.015 * d)
150
+ wt = ci.ewm(span=n2, adjust=False).mean()
151
+ return wt
152
+
153
+ def cci(series, length=20):
154
+ ma = series.rolling(length).mean()
155
+ md = (series - ma).abs().rolling(length).mean()
156
+ return (series - ma) / (0.015 * md)
157
+
158
+ def adx(df, length=14):
159
+ high = df["High"]
160
+ low = df["Low"]
161
+ close = df["Close"]
162
+ plus_dm = (high - high.shift(1)).clip(lower=0)
163
+ minus_dm = (low.shift(1) - low).clip(lower=0)
164
+ plus_dm[plus_dm < minus_dm] = 0
165
+ minus_dm[minus_dm <= plus_dm] = 0
166
+
167
+ tr1 = df["High"] - df["Low"]
168
+ tr2 = abs(df["High"] - close.shift(1))
169
+ tr3 = abs(df["Low"] - close.shift(1))
170
+ tr = pd.concat([tr1, tr2, tr3], axis=1).max(axis=1)
171
+ atr = tr.ewm(alpha=1/length, adjust=False).mean()
172
+ plus_di = 100 * (plus_dm.ewm(alpha=1/length, adjust=False).mean() / atr)
173
+ minus_di = 100 * (minus_dm.ewm(alpha=1/length, adjust=False).mean() / atr)
174
+ dx = 100 * abs(plus_di - minus_di) / (plus_di + minus_di)
175
+ return dx.ewm(alpha=1/length, adjust=False).mean()
176
+
177
+ # --- Build Features ---
178
+ df["hlc3"] = (df["High"] + df["Low"] + df["Close"]) / 3.0
179
+ df["feat1"] = rsi(df["Close"], 14)
180
+ df["feat2"] = wave_trend(df["hlc3"], 10, 11)
181
+ df["feat3"] = cci(df["Close"], 20)
182
+ df["feat4"] = adx(df, 14)
183
+ df["feat5"] = rsi(df["Close"], 9)
184
+
185
+ features = df[["feat1", "feat2", "feat3", "feat4", "feat5"]].to_numpy()
186
+ features_windowed = np.array([
187
+ features[i - window_length + 1: i + 1].flatten()
188
+ for i in range(window_length - 1, n)
189
+ ])
190
+ n_window = features_windowed.shape[0]
191
+
192
+ # --- Lorentzian Distance & KNN ---
193
+ def lorentzian_distance(a, b):
194
+ return np.sum(np.log1p(np.abs(a - b)))
195
+
196
+ y_train = np.zeros(n, dtype=int)
197
+ for i in range(n - barLookahead):
198
+ if df["Close"].iloc[i + barLookahead] > df["Close"].iloc[i]:
199
+ y_train[i] = 1
200
+ elif df["Close"].iloc[i + barLookahead] < df["Close"].iloc[i]:
201
+ y_train[i] = -1
202
+ else:
203
+ y_train[i] = 0
204
+
205
+ prediction_arr = np.zeros(n_window, dtype=float)
206
+ for idx in range(n_window):
207
+ global_idx = idx + window_length - 1
208
+ if global_idx < maxBarsBack:
209
+ prediction_arr[idx] = 0
210
+ continue
211
+ start_idx = max(0, idx - maxBarsBack)
212
+ dist_list = []
213
+ idx_list = []
214
+ for j in range(start_idx, idx):
215
+ d = lorentzian_distance(features_windowed[idx], features_windowed[j])
216
+ dist_list.append(d)
217
+ idx_list.append(j)
218
+ dist_list = np.array(dist_list)
219
+ idx_list = np.array(idx_list)
220
+ if len(dist_list) > 0:
221
+ k = min(neighborsCount, len(dist_list))
222
+ nearest = np.argpartition(dist_list, k)[:k]
223
+ neighbor_labels = y_train[idx_list[nearest] + window_length - 1]
224
+ prediction_arr[idx] = neighbor_labels.sum()
225
+ else:
226
+ prediction_arr[idx] = 0
227
+
228
+ # --- Signal Logic ---
229
+ signal = np.zeros(n_window, dtype=int)
230
+ for idx in range(1, n_window):
231
+ if prediction_arr[idx] > 0:
232
+ signal[idx] = 1
233
+ elif prediction_arr[idx] < 0:
234
+ signal[idx] = -1
235
+ else:
236
+ signal[idx] = signal[idx - 1]
237
+
238
+ startLong = np.zeros(n_window, dtype=bool)
239
+ startShort = np.zeros(n_window, dtype=bool)
240
+ for idx in range(1, n_window):
241
+ startLong[idx] = (signal[idx] == 1) and (signal[idx - 1] != 1)
242
+ startShort[idx] = (signal[idx] == -1) and (signal[idx - 1] != -1)
243
+
244
+ n_long_signals = int(np.count_nonzero(startLong))
245
+ n_short_signals = int(np.count_nonzero(startShort))
246
+
247
+ # --- Build Plotly Chart ---
248
+ fig = go.Figure()
249
+ fig.add_trace(go.Scatter(
250
+ x=df["Date"],
251
+ y=df["Close"],
252
+ mode='lines',
253
+ line=dict(color="silver", width=1.2),
254
+ name="Close Price"
255
+ ))
256
+
257
+ pos_x, pos_y = [], []
258
+ neg_x, neg_y = [], []
259
+ neu_x, neu_y = [], []
260
+ for idx in range(n_window):
261
+ global_idx = idx + window_length - 1
262
+ x_date = df["Date"].iloc[global_idx]
263
+ y_low = df["Low"].iloc[global_idx]
264
+ y_high = df["High"].iloc[global_idx]
265
+ if prediction_arr[idx] > 0:
266
+ pos_x.extend([x_date, x_date, None])
267
+ pos_y.extend([y_low, y_high, None])
268
+ elif prediction_arr[idx] < 0:
269
+ neg_x.extend([x_date, x_date, None])
270
+ neg_y.extend([y_low, y_high, None])
271
+ else:
272
+ neu_x.extend([x_date, x_date, None])
273
+ neu_y.extend([y_low, y_high, None])
274
+
275
+ if pos_x:
276
+ fig.add_trace(go.Scatter(
277
+ x=pos_x,
278
+ y=pos_y,
279
+ mode="lines",
280
+ line=dict(color="rgba(0,204,0,0.5)", width=1.0),
281
+ name="Positive predictions"
282
+ ))
283
+ if neg_x:
284
+ fig.add_trace(go.Scatter(
285
+ x=neg_x,
286
+ y=neg_y,
287
+ mode="lines",
288
+ line=dict(color="rgba(204,0,0,0.5)", width=1.0),
289
+ name="Negative predictions"
290
+ ))
291
+ if neu_x:
292
+ fig.add_trace(go.Scatter(
293
+ x=neu_x,
294
+ y=neu_y,
295
+ mode="lines",
296
+ line=dict(color="rgba(179,179,179,0.3)", width=1.0),
297
+ name="Neutral predictions"
298
+ ))
299
+
300
+ long_x, long_y = [], []
301
+ short_x, short_y = [], []
302
+ for idx in range(1, n_window):
303
+ global_idx = idx + window_length - 1
304
+ if startLong[idx]:
305
+ long_x.append(df["Date"].iloc[global_idx])
306
+ long_y.append(df["Low"].iloc[global_idx] * 0.99)
307
+ elif startShort[idx]:
308
+ short_x.append(df["Date"].iloc[global_idx])
309
+ short_y.append(df["High"].iloc[global_idx] * 1.01)
310
+
311
+ if long_x:
312
+ fig.add_trace(go.Scatter(
313
+ x=long_x,
314
+ y=long_y,
315
+ mode='markers',
316
+ marker=dict(symbol="triangle-up", size=10, color="lime", line=dict(color="white", width=1)),
317
+ name="Long Entry"
318
+ ))
319
+ if short_x:
320
+ fig.add_trace(go.Scatter(
321
+ x=short_x,
322
+ y=short_y,
323
+ mode='markers',
324
+ marker=dict(symbol="triangle-down", size=10, color="red", line=dict(color="white", width=1)),
325
+ name="Short Entry"
326
+ ))
327
+
328
+ fig.update_layout(
329
+ template="plotly_dark",
330
+ title=dict(text=f"{ticker} — KNN Signals via Lorentzian Distance ({start_date} to {end_date})", font=dict(color="white")),
331
+ xaxis=dict(title="Date", tickformat="%Y-%m-%d", titlefont=dict(color="white"), tickfont=dict(color="white")),
332
+ yaxis=dict(title="Price", titlefont=dict(color="white"), tickfont=dict(color="white")),
333
+ legend=dict(font=dict(color="white"))
334
+ )
335
+ fig.update_xaxes(showgrid=True, gridcolor="grey")
336
+ fig.update_yaxes(showgrid=True, gridcolor="grey")
337
+
338
+ # --- Output Only the Chart ---
339
+ st.markdown("### Price and Signal Annotations")
340
+ st.markdown(
341
+ f"""
342
+ The chart below shows **{ticker}** close price along with signals derived from historical pattern similarity. **Gray bars** mark the initial lookback window with no predictions.
343
+
344
+ """
345
+ )
346
+ st.write(f"Long signals: {n_long_signals}, Short signals: {n_short_signals}.")
347
+ st.plotly_chart(fig, use_container_width=True)
348
+
349
+ except Exception:
350
+ st.error("An error occurred during the analysis.")
351
+
352
+ # Hide default Streamlit style
353
+ st.markdown(
354
+ """
355
+ <style>
356
+ #MainMenu {visibility: hidden;}
357
+ footer {visibility: hidden;}
358
+ </style>
359
+ """,
360
+ unsafe_allow_html=True
361
+ )