cloudyu commited on
Commit
eaaed28
·
verified ·
1 Parent(s): f1683c3

Create signal_generator.py

Browse files
Files changed (1) hide show
  1. signal_generator.py +327 -0
signal_generator.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import yfinance as yf
2
+ import talib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import xgboost as xgb
6
+ import argparse
7
+ import sys
8
+ import requests
9
+ from datetime import datetime, timedelta
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ def parse_arguments():
14
+ """Parse command line arguments"""
15
+ parser = argparse.ArgumentParser(description='Stock trading signal generator')
16
+ parser.add_argument('tickers', nargs='+', help='List of stock ticker symbols to predict')
17
+ parser.add_argument('--period', default='2y', help='Historical data period (default: 2y)')
18
+ parser.add_argument('--target', help='Target stock ticker (default: first ticker)')
19
+ return parser.parse_args()
20
+
21
+ def get_news_sentiment_score(headlines, target_ticker):
22
+ if not headlines:
23
+ return 0.0, 0.0
24
+
25
+ positive_words = ['beat', 'exceed', 'strong', 'growth', 'upgrade', 'bullish', 'positive',
26
+ 'record', 'surge', 'rally', 'gain', 'profit', 'success', 'innovation']
27
+ negative_words = ['miss', 'decline', 'drop', 'fall', 'downgrade', 'bearish', 'negative',
28
+ 'loss', 'scandal', 'lawsuit', 'layoff', 'bankruptcy', 'crisis', 'warning']
29
+
30
+ company_keywords = [target_ticker.lower()]
31
+ if target_ticker == 'AAPL':
32
+ company_keywords.extend(['apple', 'iphone', 'mac', 'ios'])
33
+ elif target_ticker == 'MSFT':
34
+ company_keywords.extend(['microsoft', 'windows', 'azure', 'office'])
35
+ elif target_ticker == 'GOOGL':
36
+ company_keywords.extend(['google', 'alphabet', 'search', 'android', 'youtube'])
37
+ elif target_ticker == 'AMZN':
38
+ company_keywords.extend(['amazon', 'aws', 'prime', 'ecommerce'])
39
+ elif target_ticker == 'QQQ':
40
+ company_keywords.extend(['nasdaq', 'tech', 'technology', 'index'])
41
+
42
+ total_sentiment = 0
43
+ relevant_articles = 0
44
+ total_articles = len(headlines)
45
+
46
+ for headline in headlines:
47
+ headline_lower = headline.lower()
48
+ is_relevant = any(keyword in headline_lower for keyword in company_keywords)
49
+
50
+ if is_relevant:
51
+ relevant_articles += 1
52
+ pos_count = sum(1 for word in positive_words if word in headline_lower)
53
+ neg_count = sum(1 for word in negative_words if word in headline_lower)
54
+ sentiment = (pos_count - neg_count) / (pos_count + neg_count) if pos_count + neg_count > 0 else 0.0
55
+ total_sentiment += sentiment
56
+
57
+ relevance_score = relevant_articles / total_articles if total_articles > 0 else 0.0
58
+ avg_sentiment = total_sentiment / relevant_articles if relevant_articles > 0 else 0.0
59
+
60
+ return avg_sentiment, relevance_score
61
+
62
+ def fetch_breaking_news(target_ticker):
63
+ headlines = []
64
+ try:
65
+ stock = yf.Ticker(target_ticker)
66
+ news = stock.news
67
+ if news:
68
+ for i in range(5):
69
+ print(news[i]['content']['summary'])
70
+ headlines.append(news[i]['content']['summary'])
71
+ except Exception as e:
72
+ pass
73
+ return headlines
74
+
75
+ def calculate_sample_weights(df, target_col):
76
+ price_changes = df[target_col].diff().abs()
77
+ price_changes = price_changes.replace(0, np.nan).fillna(price_changes.mean())
78
+
79
+ q75, q25 = np.percentile(price_changes.dropna(), [75, 25])
80
+ iqr = q75 - q25
81
+ if iqr == 0:
82
+ iqr = price_changes.std()
83
+
84
+ normalized_vol = (price_changes - price_changes.mean()) / (iqr + 1e-8)
85
+ weights = 1 + np.clip(normalized_vol, 0, 3)
86
+ weights = weights.fillna(1.0)
87
+
88
+ return weights.values
89
+
90
+ def detect_price_manipulation(df, close_col, volume_col=None):
91
+ manipulation_signals = {}
92
+ manipulation_score = 0.0
93
+
94
+ returns = df[close_col].pct_change()
95
+ current_vol = returns.tail(5).std()
96
+ historical_vol = returns.rolling(20).std().iloc[-6]
97
+ vol_ratio = current_vol / historical_vol if historical_vol > 0 else 1.0
98
+ manipulation_signals['abnormal_volatility'] = vol_ratio > 2.0
99
+ manipulation_score += 0.2 if manipulation_signals['abnormal_volatility'] else 0
100
+
101
+ consecutive_up = 0
102
+ recent_returns = returns.tail(10)
103
+ for ret in recent_returns[::-1]:
104
+ if ret > 0:
105
+ consecutive_up += 1
106
+ else:
107
+ break
108
+ manipulation_signals['consecutive_green_days'] = consecutive_up >= 5
109
+ manipulation_score += 0.15 if manipulation_signals['consecutive_green_days'] else 0
110
+
111
+ if volume_col is not None and volume_col in df.columns:
112
+ recent_prices = df[close_col].tail(5)
113
+ recent_volumes = df[volume_col].tail(5)
114
+ price_trend = (recent_prices.iloc[-1] - recent_prices.iloc[0]) / recent_prices.iloc[0]
115
+ volume_trend = (recent_volumes.iloc[-1] - recent_volumes.iloc[0]) / recent_volumes.iloc[0]
116
+ manipulation_signals['price_volume_divergence'] = price_trend > 0.05 and volume_trend < -0.1
117
+ manipulation_score += 0.2 if manipulation_signals['price_volume_divergence'] else 0
118
+ else:
119
+ manipulation_signals['price_volume_divergence'] = False
120
+
121
+ gaps = (df[close_col] - df[close_col].shift(1)) / df[close_col].shift(1)
122
+ recent_gaps = gaps.tail(10)
123
+ large_gaps = (recent_gaps.abs() > 0.03).sum()
124
+ manipulation_signals['excessive_gaps'] = large_gaps >= 3
125
+ manipulation_score += 0.15 if manipulation_signals['excessive_gaps'] else 0
126
+
127
+ sma_20 = df[close_col].rolling(20).mean()
128
+ current_price = df[close_col].iloc[-1]
129
+ current_sma = sma_20.iloc[-1]
130
+ price_deviation = abs(current_price - current_sma) / current_sma
131
+ manipulation_signals['extreme_ma_deviation'] = price_deviation > 0.15
132
+ manipulation_score += 0.15 if manipulation_signals['extreme_ma_deviation'] else 0
133
+
134
+ rsi = talib.RSI(df[close_col], 14)
135
+ recent_rsi = rsi.tail(5)
136
+ overbought_persistent = (recent_rsi > 70).all()
137
+ manipulation_signals['persistent_overbought'] = overbought_persistent
138
+ manipulation_score += 0.15 if manipulation_signals['persistent_overbought'] else 0
139
+
140
+ manipulation_score = min(manipulation_score, 1.0)
141
+ return manipulation_score, manipulation_signals
142
+
143
+ def main():
144
+ args = parse_arguments()
145
+ target_ticker = args.target if args.target else args.tickers[0]
146
+
147
+ if target_ticker not in args.tickers:
148
+ args.tickers.append(target_ticker)
149
+
150
+ tickers = {}
151
+ for ticker in args.tickers:
152
+ if ticker.upper() == 'VIX':
153
+ tickers[ticker] = "^VIX"
154
+ elif ticker.upper() == 'TNX':
155
+ tickers[ticker] = "^TNX"
156
+ elif ticker.upper() == 'DXY':
157
+ tickers[ticker] = "DX-Y.NYB"
158
+ else:
159
+ tickers[ticker] = ticker
160
+
161
+ # Download daily data
162
+ raw_data = yf.download(list(tickers.values()), period=args.period, progress=False)
163
+ if raw_data.empty:
164
+ print("Error: Failed to download data")
165
+ sys.exit(1)
166
+
167
+ # Fetch news
168
+ news_headlines = fetch_breaking_news(target_ticker)
169
+ news_sentiment, news_relevance = get_news_sentiment_score(news_headlines, target_ticker)
170
+
171
+ # Prepare training data
172
+ training_data = raw_data.iloc[:-1]
173
+ latest_target_price = raw_data['Close'][target_ticker].iloc[-1]
174
+ latest_date = raw_data.index[-1]
175
+
176
+ df = pd.DataFrame(index=training_data.index)
177
+ df[f'{target_ticker}_Open'] = training_data['Open'][target_ticker]
178
+ df[f'{target_ticker}_High'] = training_data['High'][target_ticker]
179
+ df[f'{target_ticker}_Low'] = training_data['Low'][target_ticker]
180
+ df[f'{target_ticker}_Close'] = training_data['Close'][target_ticker]
181
+ df[f'{target_ticker}_Volume'] = training_data['Volume'][target_ticker]
182
+
183
+ for ticker, yf_symbol in tickers.items():
184
+ if ticker != target_ticker:
185
+ df[f'{ticker}_Close'] = training_data['Close'][yf_symbol]
186
+
187
+ df = df.ffill().dropna()
188
+
189
+ # Technical indicators
190
+ close_col = f'{target_ticker}_Close'
191
+ high_col = f'{target_ticker}_High'
192
+ low_col = f'{target_ticker}_Low'
193
+ volume_col = f'{target_ticker}_Volume'
194
+
195
+ df['RSI'] = talib.RSI(df[close_col], 14)
196
+ df['MACD'], df['MACD_signal'], _ = talib.MACD(df[close_col])
197
+ df['SMA_20'] = talib.SMA(df[close_col], 20)
198
+ df['SMA_50'] = talib.SMA(df[close_col], 50)
199
+ df['ATR'] = talib.ATR(df[high_col], df[low_col], df[close_col], 14)
200
+ df['Vol_10'] = df[close_col].pct_change().rolling(10).std()
201
+
202
+ # Cross-market features
203
+ for ticker in tickers.keys():
204
+ if ticker != target_ticker:
205
+ if ticker.upper() == 'VIX':
206
+ df['VIX_Rank'] = df[f'{ticker}_Close'].rolling(126).rank(pct=True) * 100
207
+ df['VIX_Slope'] = df[f'{ticker}_Close'].diff(5)
208
+ df['VIX_Sustained_High'] = ((df[f'{ticker}_Close'] > 20) &
209
+ (df[f'{ticker}_Close'] > df[f'{ticker}_Close'].rolling(10).mean())).astype(int)
210
+ elif ticker.upper() == 'TNX':
211
+ df['TNX_SMA_20'] = talib.SMA(df[f'{ticker}_Close'], 20)
212
+ df['TNX_Rising'] = (df[f'{ticker}_Close'] > df['TNX_SMA_20']).astype(int)
213
+ df['TNX_Accel'] = df[f'{ticker}_Close'].diff(5)
214
+ elif ticker.upper() == 'DXY':
215
+ df['DXY_SMA_50'] = talib.SMA(df[f'{ticker}_Close'], 50)
216
+ df['USD_Strength'] = (df[f'{ticker}_Close'] > df['DXY_SMA_50']).astype(int)
217
+ df['DXY_Slope'] = df[f'{ticker}_Close'].diff(5)
218
+ else:
219
+ df[f'{target_ticker}_{ticker}_Ratio'] = df[close_col] / df[f'{ticker}_Close']
220
+ df[f'{target_ticker}_{ticker}_Ratio_SMA'] = talib.SMA(df[f'{target_ticker}_{ticker}_Ratio'].values, 20)
221
+ df[f'{ticker}_Trend_Up'] = (df[f'{ticker}_Close'] > df[f'{ticker}_Close'].rolling(50).mean()).astype(int)
222
+
223
+ # Create target
224
+ df['Next_Return'] = df[close_col].pct_change().shift(-1)
225
+ df['Target'] = (df['Next_Return'] > 0).astype(int)
226
+ df_for_model = df.dropna().copy()
227
+
228
+ feature_cols = [col for col in df.columns if f'{target_ticker}_' not in col and col not in ['Next_Return', 'Target']]
229
+
230
+ if len(df_for_model) < 50:
231
+ raise ValueError(f"Insufficient training {len(df_for_model)} rows")
232
+
233
+ # Train model
234
+ sample_weights = calculate_sample_weights(df_for_model, close_col)
235
+ model_params = {
236
+ 'n_estimators': 5, 'max_depth': 3, 'learning_rate': 0.01, 'subsample': 0.8,
237
+ 'colsample_bytree': 0.8, 'random_state': 42, 'eval_metric': 'logloss', 'use_label_encoder': False
238
+ }
239
+
240
+ final_model = xgb.XGBClassifier(**model_params)
241
+ final_model.fit(df_for_model[feature_cols], df_for_model['Target'], sample_weight=sample_weights)
242
+
243
+ # Prepare prediction features
244
+ prediction_features_df = pd.DataFrame(index=[raw_data.index[-2]])
245
+ prediction_features_df[f'{target_ticker}_Open'] = raw_data['Open'][target_ticker].iloc[-2]
246
+ prediction_features_df[f'{target_ticker}_High'] = raw_data['High'][target_ticker].iloc[-2]
247
+ prediction_features_df[f'{target_ticker}_Low'] = raw_data['Low'][target_ticker].iloc[-2]
248
+ prediction_features_df[f'{target_ticker}_Close'] = raw_data['Close'][target_ticker].iloc[-2]
249
+ prediction_features_df[f'{target_ticker}_Volume'] = raw_data['Volume'][target_ticker].iloc[-2]
250
+
251
+ for ticker, yf_symbol in tickers.items():
252
+ if ticker != target_ticker:
253
+ prediction_features_df[f'{ticker}_Close'] = raw_data['Close'][yf_symbol].iloc[-2]
254
+
255
+ prediction_features_df['RSI'] = df['RSI'].iloc[-1]
256
+ prediction_features_df['MACD'] = df['MACD'].iloc[-1]
257
+ prediction_features_df['MACD_signal'] = df['MACD_signal'].iloc[-1]
258
+ prediction_features_df['SMA_20'] = df['SMA_20'].iloc[-1]
259
+ prediction_features_df['SMA_50'] = df['SMA_50'].iloc[-1]
260
+ prediction_features_df['ATR'] = df['ATR'].iloc[-1]
261
+ prediction_features_df['Vol_10'] = df['Vol_10'].iloc[-1]
262
+
263
+ for ticker in tickers.keys():
264
+ if ticker != target_ticker:
265
+ if ticker.upper() == 'VIX':
266
+ prediction_features_df['VIX_Rank'] = df['VIX_Rank'].iloc[-1]
267
+ prediction_features_df['VIX_Slope'] = df['VIX_Slope'].iloc[-1]
268
+ prediction_features_df['VIX_Sustained_High'] = df['VIX_Sustained_High'].iloc[-1]
269
+ elif ticker.upper() == 'TNX':
270
+ prediction_features_df['TNX_SMA_20'] = df['TNX_SMA_20'].iloc[-1]
271
+ prediction_features_df['TNX_Rising'] = df['TNX_Rising'].iloc[-1]
272
+ prediction_features_df['TNX_Accel'] = df['TNX_Accel'].iloc[-1]
273
+ elif ticker.upper() == 'DXY':
274
+ prediction_features_df['DXY_SMA_50'] = df['DXY_SMA_50'].iloc[-1]
275
+ prediction_features_df['USD_Strength'] = df['USD_Strength'].iloc[-1]
276
+ prediction_features_df['DXY_Slope'] = df['DXY_Slope'].iloc[-1]
277
+ else:
278
+ ratio_val = raw_data['Close'][target_ticker].iloc[-2] / raw_data['Close'][yf_symbol].iloc[-2]
279
+ prediction_features_df[f'{target_ticker}_{ticker}_Ratio'] = ratio_val
280
+ prediction_features_df[f'{target_ticker}_{ticker}_Ratio_SMA'] = df[f'{target_ticker}_{ticker}_Ratio_SMA'].iloc[-1]
281
+ prediction_features_df[f'{ticker}_Trend_Up'] = df[f'{ticker}_Trend_Up'].iloc[-1]
282
+
283
+ pred_features = prediction_features_df[feature_cols].iloc[0:1]
284
+ base_signal = int(final_model.predict(pred_features)[0])
285
+
286
+ # Manipulation detection
287
+ target_stock_series = pd.DataFrame(index=raw_data.index)
288
+ target_stock_series['Close'] = raw_data['Close'][target_ticker]
289
+ target_stock_series['Volume'] = raw_data['Volume'][target_ticker]
290
+ manipulation_score, _ = detect_price_manipulation(target_stock_series, 'Close', 'Volume')
291
+
292
+ # News override
293
+ final_signal = base_signal
294
+ if news_relevance > 0.3 and abs(news_sentiment) > 0.5:
295
+ if news_sentiment < -0.7:
296
+ final_signal = 0
297
+ elif news_sentiment > 0.7:
298
+ final_signal = 1
299
+
300
+ # Manipulation override
301
+ if manipulation_score >= 0.5 and base_signal == 1:
302
+ final_signal = 0
303
+
304
+ # Calculate price range
305
+ vol_10 = df['Vol_10'].iloc[-1]
306
+ expected_move = latest_target_price * vol_10 if pd.notna(vol_10) else latest_target_price * 0.02
307
+
308
+ if news_relevance > 0.3:
309
+ news_multiplier = 1.0 + abs(news_sentiment) * news_relevance
310
+ expected_move *= news_multiplier
311
+
312
+ upper_target = latest_target_price + expected_move
313
+ lower_target = latest_target_price - expected_move
314
+
315
+ # **SIMPLIFIED OUTPUT - ALWAYS SHOW PRICE RANGE**
316
+ print(f"{target_ticker} | {latest_date.strftime('%Y-%m-%d')} | ${latest_target_price:.2f}")
317
+
318
+ if manipulation_score >= 0.7:
319
+ print(f"SIGNAL: AVOID | Range: ${lower_target:.2f} - ${upper_target:.2f} (High manipulation risk)")
320
+ elif final_signal == 1:
321
+ print(f"SIGNAL: BUY | Range: ${lower_target:.2f} - ${upper_target:.2f} | Target: ${upper_target:.2f}")
322
+ else:
323
+ print(f"SIGNAL: HOLD CASH | Range: ${lower_target:.2f} - ${upper_target:.2f}")
324
+
325
+ if __name__ == "__main__":
326
+ main()
327
+ print("Disclaimer: This is for informational purposes only and does not constitute investment advice.")