Spaces:
Runtime error
Runtime error
| from bs4 import BeautifulSoup | |
| import requests | |
| import pandas as pd | |
| import itertools | |
| import yfinance as yf | |
| import hopsworks | |
| from datetime import datetime, timedelta | |
| ## Fetch stock price data from Yahoo Finance | |
| def get_stock_price(ticker, start_date, end_date): | |
| company = 'APPLE' | |
| if ticker == 'AMAZ': | |
| company = 'AMAZON' | |
| elif ticker == 'META': | |
| company = 'META' | |
| stock_df = yf.download(ticker, start=start_date, end=end_date) | |
| stock_df = stock_df.reset_index(level=0) | |
| stock_df.columns = stock_df.columns.str.lower() | |
| stock_df.rename(columns={'adj close': 'adj_close'}, inplace=True) | |
| stock_df.insert(0, 'name', company) | |
| stock_df['date'] = pd.to_datetime(stock_df.date).dt.tz_localize(None) | |
| return stock_df | |
| ## Fetch stock news from hopsworks | |
| def time_2_datetime(x): | |
| dt_obj = datetime.fromtimestamp(x / 1000) | |
| return dt_obj | |
| def get_stock_price_from_hopsworks(name): | |
| project = hopsworks.login() | |
| fs = project.get_feature_store() | |
| stock_fg = fs.get_feature_group(name="stocks_fg", version=1) | |
| query = stock_fg.select_all() | |
| stock_df = query.read() | |
| stock_df = stock_df.loc[stock_df['name'] == name.upper()] | |
| stock_df['date'] = stock_df['date'].apply(time_2_datetime) | |
| stock_df = stock_df.sort_values(by='date') | |
| return stock_df.head(1) | |
| ## Scrape stock news from investing.com | |
| def get_articles_urls(company,startpage, endpage): | |
| urls=[] | |
| for page in range(startpage, endpage): | |
| if page % 100 == 0: | |
| print(page) | |
| url = f"https://www.investing.com/equities/{company}-inc-news/{page}" | |
| page=requests.get(url) | |
| soup=BeautifulSoup(page.text,'html.parser') | |
| for elt in soup.find_all('div',attrs={'class':'mediumTitle1'})[1].find_all('article'): | |
| urls.append('https://www.investing.com/'+elt.find('a')['href']) | |
| return list(itertools.filterfalse(lambda x: x.startswith('https://www.investing.com//pro/offers'), urls)) | |
| def scrape_news(urls, df, company): | |
| for url in urls: | |
| page = requests.get(url) | |
| soup=BeautifulSoup(page.text,'html.parser') | |
| if type(soup.find('h1',attrs={'class':'articleHeader'})) is type(None): | |
| print(url) | |
| continue | |
| Title=soup.find('h1',attrs={'class':'articleHeader'}).text.strip() | |
| Date=soup.find('div',attrs={'class':'contentSectionDetails'}).find("span").text.strip() | |
| Article=' '.join([x.get_text() for x in soup.find('div',attrs={'class':'WYSIWYG articlePage'}).find_all("p")]).replace('Position added successfully to:','').strip() | |
| tmpdic = {'ticker': company, 'publish_date': Date, 'title': Title, 'body_text': Article, 'url': url} | |
| df=df.append(pd.DataFrame(tmpdic, index=[0])) | |
| return df | |
| ## Fetch stock news from hopsworks | |
| def get_news_from_hopsworks(): | |
| project = hopsworks.login() | |
| fs = project.get_feature_store() | |
| news_fg = fs.get_feature_group(name="market_news_fg_for_three", version=1) | |
| # try: | |
| # feature_view = fs.get_feature_view(name="market_news", version=1) | |
| # except: | |
| # news_fg = fs.get_feature_group(name="market_news_fg", version=1) | |
| # query = news_fg.select_all() | |
| # feature_view = fs.create_feature_view(name="market_news", | |
| # version=1, | |
| # description="Read from market_news_fg", | |
| # query=query) | |
| query = news_fg.select_all() | |
| return query.read() | |
| ## Fetch history prediction plot | |
| def get_history_plot_from_hopsworks(ticker): | |
| project = hopsworks.login() | |
| dataset_api = project.get_dataset_api() | |
| if ticker == 'AAPL': | |
| dataset_api.download("Resources/images/apple_stock_prediction.png", overwrite=True) | |
| elif ticker == 'AMZN': | |
| dataset_api.download("Resources/images/amazon_stock_prediction.png", overwrite=True) | |
| else: | |
| dataset_api.download("Resources/images/meta_stock_prediction.png", overwrite=True) | |
| return | |
| ## Formalize the date column | |
| def remove_parentheses(s): | |
| if '(' in s: | |
| return s[s.find("(")+1:s.find(")")] | |
| else: | |
| return s | |
| def change_date_format(df): | |
| if df['publish_date'].dtype == object: | |
| df.publish_date = df.publish_date.apply(remove_parentheses) | |
| df['publish_date'] = pd.to_datetime(df['publish_date'], format='%b %d, %Y %I:%M%p ET') | |
| return df | |
| def select_oneday_news(df, day): | |
| df_copy = df.copy() | |
| df['date'] = change_date_format(df_copy)['publish_date'] | |
| df['date'] = df['date'].apply(lambda x : x.date()) | |
| df = df.loc[df['date'] == day.date()] | |
| df = df.drop('date', axis=1) | |
| return df | |