architojha commited on
Commit
34dca54
·
1 Parent(s): 28a79e8

updated req file

Browse files
Files changed (2) hide show
  1. api.py +81 -71
  2. requirements.txt +1 -0
api.py CHANGED
@@ -6,7 +6,7 @@ import tensorflow as tf
6
  from yahoo_fin.stock_info import get_data
7
  from sklearn.preprocessing import MinMaxScaler
8
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
- from pytorch_forecasting import TemporalFusionTransformer
10
  from bs4 import BeautifulSoup
11
  import requests
12
  import torch
@@ -32,9 +32,10 @@ query_engine = index.as_query_engine(llm=llm)
32
  MODEL_PATH = "lib/20_lstm_model.h5"
33
  model = tf.keras.models.load_model(MODEL_PATH)
34
 
35
- model_name_news= "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
36
- tokenizer = AutoTokenizer.from_pretrained(model_name_news)
37
- sentiment_model = AutoModelForSequenceClassification.from_pretrained(model_name_news)
 
38
 
39
  best_model_path = 'lib/tft_pred.ckpt'
40
 
@@ -42,25 +43,30 @@ best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
42
 
43
  app = FastAPI()
44
 
 
45
  class TickerRequest(BaseModel):
46
  ticker: str
47
  start_date: str
48
  end_date: str
49
  interval: str = "1d"
50
 
 
51
  def fetch_and_process_ticker_data(ticker, start_date, end_date, interval="1d"):
52
  df = pd.DataFrame()
53
  try:
54
- temp = get_data(ticker, start_date=start_date, end_date=end_date, index_as_date=True, interval=interval)
 
55
  temp = temp.drop(columns="close")
56
  temp["revenue"] = temp["adjclose"] * temp["volume"]
57
  temp["daily_profit"] = temp["adjclose"] - temp["open"]
58
  df = pd.concat([df, temp], axis=0)
59
  df.to_csv("api_test.csv", index=False) # Save locally for reference
60
  except Exception as error:
61
- raise HTTPException(status_code=500, detail=f"Error processing ticker {ticker}: {error}")
 
62
  return df
63
 
 
64
  def ticker_encoded(df):
65
  label_map = {'ATOM': 0, 'HBIO': 1, 'IBEX': 2, 'MYFW': 3, 'NATH': 4}
66
 
@@ -77,17 +83,21 @@ def ticker_encoded(df):
77
 
78
  return df
79
 
 
80
  def normalize(df):
81
  price_scaler = MinMaxScaler()
82
  volume_revenue_scaler = MinMaxScaler()
83
  profit_scaler = MinMaxScaler()
84
 
85
- df[["open", "high", "low", "adjclose"]] = price_scaler.fit_transform(df[["open", "high", "low", "adjclose"]])
86
- df[["volume", "revenue"]] = volume_revenue_scaler.fit_transform(df[["volume", "revenue"]])
 
 
87
  df[["daily_profit"]] = profit_scaler.fit_transform(df[["daily_profit"]])
88
 
89
  return df, price_scaler
90
 
 
91
  def create_sequence(dataset):
92
  sequences = []
93
  labels = []
@@ -112,17 +122,21 @@ def create_sequence(dataset):
112
 
113
  return np.array(sequences), np.array(labels), dates, stock
114
 
115
- def scaling_predictions(price_scaler,combined_dataset_prediction):
 
116
 
117
  price_scaler.min_ = np.array([price_scaler.min_[0], price_scaler.min_[3]])
118
 
119
- price_scaler.scale_ = np.array([price_scaler.scale_[0], price_scaler.scale_[3]])
 
120
 
121
- combined_dataset_prediction_inverse =price_scaler.inverse_transform(combined_dataset_prediction)
 
122
 
123
  return combined_dataset_prediction_inverse
124
 
125
- def storing_predictions(df,dates,stock,combined_dataset_prediction_inverse):
 
126
 
127
  df['pred_open'] = np.nan
128
 
@@ -136,7 +150,6 @@ def storing_predictions(df,dates,stock,combined_dataset_prediction_inverse):
136
 
137
  for i in range(len(dates)):
138
 
139
-
140
  if current_row_date == dates[i] and stock[i] == current_row_ticker:
141
 
142
  opening_price = combined_dataset_prediction_inverse[i][0]
@@ -149,48 +162,51 @@ def storing_predictions(df,dates,stock,combined_dataset_prediction_inverse):
149
 
150
  return df
151
 
 
152
  def scrape_news(ticker_name):
153
 
154
- columns = ['datatime', 'title','source', 'link','top_sentiment','sentiment_score']
 
155
  df = pd.DataFrame(columns=columns)
156
 
157
- for i in range (1,3):
158
 
159
  url = f'https://markets.businessinsider.com/news/{ticker_name}-stock?p={i}'
160
  response = requests.get(url)
161
  html = response.text
162
  soup = BeautifulSoup(html, 'lxml')
163
 
164
- articles = soup.find_all('div',class_= 'latest-news__story')
165
 
166
  for article in articles:
167
- datatime = article.find('time', class_ = 'latest-news__date').get('datetime')
 
168
 
169
- title = article.find('a', class_ = 'news-link').text
170
 
171
- source = article.find('span', class_ = 'latest-news__source').text
172
 
173
- link = article.find('a', class_ = 'news-link').get('href')
174
 
175
  top_sentiment = ''
176
 
177
  sentiment_score = 0
178
 
179
- temp = pd.DataFrame([[datatime,title, source,link, top_sentiment,sentiment_score]], columns= df.columns)
 
180
 
181
- df = pd.concat([temp,df], axis = 0)
182
 
183
  return df
184
 
185
- def add_recent_news(main_df, news_df,lookback_days=10):
186
-
 
187
  news_df.drop(columns=['top_sentiment', 'sentiment_score'], inplace=True)
188
 
189
-
190
  main_df['date'] = pd.to_datetime(main_df['date'])
191
  news_df['datatime'] = pd.to_datetime(news_df['datatime'])
192
 
193
-
194
  news_list = []
195
  last_available_news = ''
196
 
@@ -199,74 +215,67 @@ def add_recent_news(main_df, news_df,lookback_days=10):
199
  current_ticker = row['ticker']
200
  news_articles = ''
201
 
202
-
203
  for _, news_row in news_df.iterrows():
204
  extracted_date = news_row['datatime']
205
 
206
-
207
  if (current_date - extracted_date).days <= lookback_days and extracted_date < current_date:
208
- news_articles += news_row['title'] + " "
209
 
210
-
211
  if not news_articles.strip():
212
  for _, news_row in news_df[::-1].iterrows():
213
  if news_row['datatime'] < current_date:
214
  news_articles = news_row['title']
215
  break
216
 
217
-
218
  last_available_news = news_articles.strip() or last_available_news
219
  news_list.append(last_available_news)
220
 
221
-
222
  main_df['news'] = news_list
223
 
224
-
225
  return main_df
226
 
 
227
  def news_sentiment(df):
228
 
229
  news_column_name = 'news'
230
  texts = df[news_column_name].tolist()
231
 
232
-
233
- inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt")
234
 
235
  with torch.no_grad():
236
  outputs = sentiment_model(**inputs)
237
 
238
-
239
  logits = outputs.logits
240
  probs = torch.softmax(logits, dim=-1)
241
 
242
-
243
  labels = ["negative", "neutral", "positive"]
244
 
245
-
246
  predictions = torch.argmax(probs, dim=-1)
247
 
248
-
249
- df['predicted_sentiment'] = pd.Series([labels[pred] for pred in predictions], index=df[df[news_column_name].notna()].index)
250
 
251
  sentiment_map = {
252
- 'positive': 1,
253
- 'neutral': 0,
254
- 'negative': -1
255
  }
256
 
257
-
258
  df['sentiment_score'] = df['predicted_sentiment'].map(sentiment_map)
259
 
260
  df = df.drop(columns=['news'])
261
 
262
  return df
263
 
 
264
  def get_tft_predictions(df):
265
  for i in range(1, 21):
266
  df[f'open_lag_{i}'] = df.groupby('ticker')['open'].shift(i)
267
  df[f'adjclose_lag_{i}'] = df.groupby('ticker')['adjclose'].shift(i)
268
 
269
- lag_columns = [f'open_lag_{i}' for i in range(1, 21)] + [f'adjclose_lag_{i}' for i in range(1, 21)]
 
270
 
271
  df.dropna(subset=lag_columns, inplace=True)
272
 
@@ -274,18 +283,20 @@ def get_tft_predictions(df):
274
 
275
  return predictions
276
 
 
277
  @app.post("/fetch-ticker-data/")
278
  async def fetch_ticker_data(request: TickerRequest):
279
- try:
280
- result_df = fetch_and_process_ticker_data(
281
- ticker=request.ticker,
282
- start_date=request.start_date,
283
- end_date=request.end_date,
284
- interval=request.interval
285
- )
286
- return result_df.to_dict(orient="records")
287
- except Exception as e:
288
- raise HTTPException(status_code=500, detail=str(e))
 
289
 
290
  @app.post("/predict-prices/")
291
  async def predict_prices(request: TickerRequest):
@@ -297,46 +308,45 @@ async def predict_prices(request: TickerRequest):
297
  interval=request.interval
298
  )
299
 
300
-
301
  raw_data = raw_data.tail(60)
302
- raw_data= raw_data.reset_index()
303
-
304
 
305
  raw_data.rename(columns={"index": "date"}, inplace=True)
306
  raw_data = ticker_encoded(raw_data)
307
 
308
- temp_df = raw_data.copy()
309
 
310
  normalized_data, scaler = normalize(raw_data)
311
  normalized_data = normalized_data.drop(columns=['ticker'])
312
 
313
  sequences, _, dates, stock = create_sequence(normalized_data)
314
  combined_dataset_prediction = model.predict(sequences)
315
- combined_dataset_prediction_inverse = scaling_predictions(scaler,combined_dataset_prediction)
 
316
 
 
 
 
317
 
318
- lstm_pred_df=storing_predictions(temp_df,dates,stock,combined_dataset_prediction_inverse)
319
- news_df = scrape_news(ticker_name = request.ticker)
320
-
321
- combined_with_news_df = add_recent_news(lstm_pred_df,news_df)
322
  sentiment_df = news_sentiment(combined_with_news_df)
323
-
324
  sentiment_df['time_idx'] = range(1, len(sentiment_df) + 1)
325
 
326
  predicted_values = get_tft_predictions(sentiment_df)
327
 
328
- final_pred_open_price = predicted_values[0].item()
329
- final_pred_closing_price = predicted_values[1].item()
330
 
331
- return {"open": final_pred_open_price, 'close': final_pred_closing_price}
332
 
333
  except Exception as e:
334
  raise HTTPException(status_code=500, detail=str(e))
335
-
336
 
337
  @app.get("/query-rag/{user_query}")
338
- def query_rag(user_query:str):
339
 
340
  response = query_engine.query(user_query)
341
 
342
- return {'message':response}
 
6
  from yahoo_fin.stock_info import get_data
7
  from sklearn.preprocessing import MinMaxScaler
8
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
9
+ from pytorch_forecasting import TemporalFusionTransformer
10
  from bs4 import BeautifulSoup
11
  import requests
12
  import torch
 
32
  MODEL_PATH = "lib/20_lstm_model.h5"
33
  model = tf.keras.models.load_model(MODEL_PATH)
34
 
35
+ model_name_news = "mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis"
36
+ tokenizer = AutoTokenizer.from_pretrained(model_name_news)
37
+ sentiment_model = AutoModelForSequenceClassification.from_pretrained(
38
+ model_name_news)
39
 
40
  best_model_path = 'lib/tft_pred.ckpt'
41
 
 
43
 
44
  app = FastAPI()
45
 
46
+
47
  class TickerRequest(BaseModel):
48
  ticker: str
49
  start_date: str
50
  end_date: str
51
  interval: str = "1d"
52
 
53
+
54
  def fetch_and_process_ticker_data(ticker, start_date, end_date, interval="1d"):
55
  df = pd.DataFrame()
56
  try:
57
+ temp = get_data(ticker, start_date=start_date,
58
+ end_date=end_date, index_as_date=True, interval=interval)
59
  temp = temp.drop(columns="close")
60
  temp["revenue"] = temp["adjclose"] * temp["volume"]
61
  temp["daily_profit"] = temp["adjclose"] - temp["open"]
62
  df = pd.concat([df, temp], axis=0)
63
  df.to_csv("api_test.csv", index=False) # Save locally for reference
64
  except Exception as error:
65
+ raise HTTPException(
66
+ status_code=500, detail=f"Error processing ticker {ticker}: {error}")
67
  return df
68
 
69
+
70
  def ticker_encoded(df):
71
  label_map = {'ATOM': 0, 'HBIO': 1, 'IBEX': 2, 'MYFW': 3, 'NATH': 4}
72
 
 
83
 
84
  return df
85
 
86
+
87
  def normalize(df):
88
  price_scaler = MinMaxScaler()
89
  volume_revenue_scaler = MinMaxScaler()
90
  profit_scaler = MinMaxScaler()
91
 
92
+ df[["open", "high", "low", "adjclose"]] = price_scaler.fit_transform(
93
+ df[["open", "high", "low", "adjclose"]])
94
+ df[["volume", "revenue"]] = volume_revenue_scaler.fit_transform(
95
+ df[["volume", "revenue"]])
96
  df[["daily_profit"]] = profit_scaler.fit_transform(df[["daily_profit"]])
97
 
98
  return df, price_scaler
99
 
100
+
101
  def create_sequence(dataset):
102
  sequences = []
103
  labels = []
 
122
 
123
  return np.array(sequences), np.array(labels), dates, stock
124
 
125
+
126
+ def scaling_predictions(price_scaler, combined_dataset_prediction):
127
 
128
  price_scaler.min_ = np.array([price_scaler.min_[0], price_scaler.min_[3]])
129
 
130
+ price_scaler.scale_ = np.array(
131
+ [price_scaler.scale_[0], price_scaler.scale_[3]])
132
 
133
+ combined_dataset_prediction_inverse = price_scaler.inverse_transform(
134
+ combined_dataset_prediction)
135
 
136
  return combined_dataset_prediction_inverse
137
 
138
+
139
+ def storing_predictions(df, dates, stock, combined_dataset_prediction_inverse):
140
 
141
  df['pred_open'] = np.nan
142
 
 
150
 
151
  for i in range(len(dates)):
152
 
 
153
  if current_row_date == dates[i] and stock[i] == current_row_ticker:
154
 
155
  opening_price = combined_dataset_prediction_inverse[i][0]
 
162
 
163
  return df
164
 
165
+
166
  def scrape_news(ticker_name):
167
 
168
+ columns = ['datatime', 'title', 'source',
169
+ 'link', 'top_sentiment', 'sentiment_score']
170
  df = pd.DataFrame(columns=columns)
171
 
172
+ for i in range(1, 3):
173
 
174
  url = f'https://markets.businessinsider.com/news/{ticker_name}-stock?p={i}'
175
  response = requests.get(url)
176
  html = response.text
177
  soup = BeautifulSoup(html, 'lxml')
178
 
179
+ articles = soup.find_all('div', class_='latest-news__story')
180
 
181
  for article in articles:
182
+ datatime = article.find(
183
+ 'time', class_='latest-news__date').get('datetime')
184
 
185
+ title = article.find('a', class_='news-link').text
186
 
187
+ source = article.find('span', class_='latest-news__source').text
188
 
189
+ link = article.find('a', class_='news-link').get('href')
190
 
191
  top_sentiment = ''
192
 
193
  sentiment_score = 0
194
 
195
+ temp = pd.DataFrame(
196
+ [[datatime, title, source, link, top_sentiment, sentiment_score]], columns=df.columns)
197
 
198
+ df = pd.concat([temp, df], axis=0)
199
 
200
  return df
201
 
202
+
203
+ def add_recent_news(main_df, news_df, lookback_days=10):
204
+
205
  news_df.drop(columns=['top_sentiment', 'sentiment_score'], inplace=True)
206
 
 
207
  main_df['date'] = pd.to_datetime(main_df['date'])
208
  news_df['datatime'] = pd.to_datetime(news_df['datatime'])
209
 
 
210
  news_list = []
211
  last_available_news = ''
212
 
 
215
  current_ticker = row['ticker']
216
  news_articles = ''
217
 
 
218
  for _, news_row in news_df.iterrows():
219
  extracted_date = news_row['datatime']
220
 
 
221
  if (current_date - extracted_date).days <= lookback_days and extracted_date < current_date:
222
+ news_articles += news_row['title'] + " "
223
 
 
224
  if not news_articles.strip():
225
  for _, news_row in news_df[::-1].iterrows():
226
  if news_row['datatime'] < current_date:
227
  news_articles = news_row['title']
228
  break
229
 
 
230
  last_available_news = news_articles.strip() or last_available_news
231
  news_list.append(last_available_news)
232
 
 
233
  main_df['news'] = news_list
234
 
 
235
  return main_df
236
 
237
+
238
  def news_sentiment(df):
239
 
240
  news_column_name = 'news'
241
  texts = df[news_column_name].tolist()
242
 
243
+ inputs = tokenizer(texts, padding=True,
244
+ truncation=True, return_tensors="pt")
245
 
246
  with torch.no_grad():
247
  outputs = sentiment_model(**inputs)
248
 
 
249
  logits = outputs.logits
250
  probs = torch.softmax(logits, dim=-1)
251
 
 
252
  labels = ["negative", "neutral", "positive"]
253
 
 
254
  predictions = torch.argmax(probs, dim=-1)
255
 
256
+ df['predicted_sentiment'] = pd.Series(
257
+ [labels[pred] for pred in predictions], index=df[df[news_column_name].notna()].index)
258
 
259
  sentiment_map = {
260
+ 'positive': 1,
261
+ 'neutral': 0,
262
+ 'negative': -1
263
  }
264
 
 
265
  df['sentiment_score'] = df['predicted_sentiment'].map(sentiment_map)
266
 
267
  df = df.drop(columns=['news'])
268
 
269
  return df
270
 
271
+
272
  def get_tft_predictions(df):
273
  for i in range(1, 21):
274
  df[f'open_lag_{i}'] = df.groupby('ticker')['open'].shift(i)
275
  df[f'adjclose_lag_{i}'] = df.groupby('ticker')['adjclose'].shift(i)
276
 
277
+ lag_columns = [f'open_lag_{i}' for i in range(
278
+ 1, 21)] + [f'adjclose_lag_{i}' for i in range(1, 21)]
279
 
280
  df.dropna(subset=lag_columns, inplace=True)
281
 
 
283
 
284
  return predictions
285
 
286
+
287
  @app.post("/fetch-ticker-data/")
288
  async def fetch_ticker_data(request: TickerRequest):
289
+ try:
290
+ result_df = fetch_and_process_ticker_data(
291
+ ticker=request.ticker,
292
+ start_date=request.start_date,
293
+ end_date=request.end_date,
294
+ interval=request.interval
295
+ )
296
+ return result_df.to_dict(orient="records")
297
+ except Exception as e:
298
+ raise HTTPException(status_code=500, detail=str(e))
299
+
300
 
301
  @app.post("/predict-prices/")
302
  async def predict_prices(request: TickerRequest):
 
308
  interval=request.interval
309
  )
310
 
 
311
  raw_data = raw_data.tail(60)
312
+ raw_data = raw_data.reset_index()
 
313
 
314
  raw_data.rename(columns={"index": "date"}, inplace=True)
315
  raw_data = ticker_encoded(raw_data)
316
 
317
+ temp_df = raw_data.copy()
318
 
319
  normalized_data, scaler = normalize(raw_data)
320
  normalized_data = normalized_data.drop(columns=['ticker'])
321
 
322
  sequences, _, dates, stock = create_sequence(normalized_data)
323
  combined_dataset_prediction = model.predict(sequences)
324
+ combined_dataset_prediction_inverse = scaling_predictions(
325
+ scaler, combined_dataset_prediction)
326
 
327
+ lstm_pred_df = storing_predictions(
328
+ temp_df, dates, stock, combined_dataset_prediction_inverse)
329
+ news_df = scrape_news(ticker_name=request.ticker)
330
 
331
+ combined_with_news_df = add_recent_news(lstm_pred_df, news_df)
 
 
 
332
  sentiment_df = news_sentiment(combined_with_news_df)
333
+
334
  sentiment_df['time_idx'] = range(1, len(sentiment_df) + 1)
335
 
336
  predicted_values = get_tft_predictions(sentiment_df)
337
 
338
+ final_pred_open_price = predicted_values[0].item()
339
+ final_pred_closing_price = predicted_values[1].item()
340
 
341
+ return {"open": final_pred_open_price, 'close': final_pred_closing_price}
342
 
343
  except Exception as e:
344
  raise HTTPException(status_code=500, detail=str(e))
345
+
346
 
347
  @app.get("/query-rag/{user_query}")
348
+ def query_rag(user_query: str):
349
 
350
  response = query_engine.query(user_query)
351
 
352
+ return {'message': response}
requirements.txt CHANGED
@@ -15,3 +15,4 @@ llama-index-core
15
  llama-index-embeddings-huggingface
16
  python-dotenv
17
  llama-index-llms-huggingface-api
 
 
15
  llama-index-embeddings-huggingface
16
  python-dotenv
17
  llama-index-llms-huggingface-api
18
+ uvicorn