import yfinance as yf import os import pandas as pd import requests import json import datetime #calculating columns how many days to expiration dates def daysleft(inputdate): import datetime previous_date = datetime.datetime.strptime(inputdate, '%Y-%m-%d') today = datetime.datetime.today() ndays = (previous_date - today).days +1 return ndays #get All Option Chain for a ticker, calculate additional columns and retun as a dataframe from Yahoo finance def get_DF_optionchain(ticker): import requests_cache session = requests_cache.CachedSession('yfinance.cache') session.headers['User-agent'] = 'my-program/1.0' # ticker = yf.Ticker('msft', session=session) tkobj = yf.Ticker(ticker, session=session) exps = tkobj.options # Current Price in Market Session # lastprice = tkobj.info['currentPrice'] lastprice = tkobj.history().iloc[-1]['Close'] #base price for additional column bias_price = 'mark' # bias_price = 'lastPrice' options = pd.DataFrame() for e in exps: try: print(e) opt = tkobj.option_chain(e) # opt_calls = pd.DataFrame().append(opt.calls) opt_calls = opt.calls opt_calls['Type'] = 'CALL' # opt_puts = pd.DataFrame().append(opt.puts) opt_puts = opt.puts opt_puts['Type'] = 'PUT' opt = opt_calls._append(opt_puts) opt['expirationDate'] = e opt['daysleft'] = daysleft(e) opt[['bid', 'ask', 'strike']] = opt[['bid', 'ask', 'strike']].apply(pd.to_numeric) opt['mark'] = (opt['bid'] + opt['ask']) / 2 opt['pricepercent'] = 100 * opt['mark'] / lastprice opt[['bid', 'ask', 'strike']] = opt[['bid', 'ask', 'strike']].apply(pd.to_numeric) opt['mark'] = (opt['bid'] + opt['ask']) / 2 opt['interinsicvalue'] = opt.apply(lambda row: abs(lastprice - row['strike']) if row['inTheMoney'] else 0, axis=1) opt['timevalue'] = opt.apply(lambda row: row[bias_price] - row['interinsicvalue'], axis=1) opt['breakevenprice'] = opt.apply(lambda row: row[bias_price]+ row['strike'] if row['Type']=='CALL' else row['strike'] - row['lastPrice'], axis=1) options = options._append(opt, ignore_index=True) except: print('ERROR') pass return options pathdatafolder = os.path.join('data' , datetime.datetime.today().strftime("%Y%m%d")) if not os.path.exists(fr"/app/{pathdatafolder}"): os.mkdir(fr"/app/{pathdatafolder}") tickers = ['TSLA', 'META', 'GOOG', 'IBM', 'MSFT','NKE','DLTR','DG'] for ticker in tickers: print(ticker) try: options = get_DF_optionchain(ticker) options = options.drop(['contractSize','currency'] , axis=1) options.to_csv(fr"/app/{pathdatafolder}/{ticker}.csv",index=False) except: pass # df = pd.read_csv(r"\\PERSIA\New Volume\storage\premarket\america_2022-04-14.csv") # df2 = df.query("`Market Capitalization`>10e9") # import timeit # start = timeit.default_timer() # for index,row in df2.iterrows(): # ticker = row.Ticker # print(ticker) # try: # options = get_DF_optionchain(ticker) # options.to_csv(fr"./{pathdatafolder}/{ticker}.csv") # except: # pass # stop = timeit.default_timer() # print('Time: ', stop - start) # import os # import glob # DFtotal = pd.DataFrame() # for csvfile in glob.glob(r'./data/*.csv'): # DF = pd.read_csv(csvfile) # DF['Ticker'] = os.path.basename(csvfile).split('.')[0] # DFtotal = DFtotal.append(DF) # DFtotal.shape # DFtotal.columns # DFtotal.sort_values(by='impliedVolatility')[-10:].T # DFtotal.sort_values(by='impliedVolatility')[-10:].T # DFtotal.query('volume>100').sort_values(by='impliedVolatility')[-10:].T # DFtotal.query('daysleft>7 & daysleft<60 & not inTheMoney & Type=="CALL" & openInterest>1000').sort_values(by='pricepercent')[-5:].T # dftest = DFtotal.query('daysleft>7 & daysleft<60 & inTheMoney!=1 & Type=="CALL" &openInterest>100').sort_values(by='pricepercent') # dftest['lasttradedate']=dftest['lastTradeDate'].str.split(' ', 0, expand=False).str[0] # dftest.to_csv('test.csv') # # dftest['lasttradedate']=pd.to_datetime(dftest['lastTradeDate']) # dftest['lasttradedate2']=pd.to_datetime(dftest['lasttradedate'],format='%Y-%m-%d') # dftest['year'] = pd.DatetimeIndex(dftest['lasttradedate']).year # dftest['month'] = pd.DatetimeIndex(dftest['lasttradedate']).month # dftest['day'] = pd.DatetimeIndex(dftest['lasttradedate']).day # dftest.info() # dftest.query('year==2022 & month==4 & day==14').sort_values(by='pricepercent') # [-5:].T # # dftest.query('lasttradedate>2022-04-14') # df = pd.read_csv(r"./{pathdatafolder}/AAPL.csv") # df.columns # df.sort_values(by='impliedVolatility')[-5:].T # df.query('impliedVolatility>1').sort_values(by='openInterest')[-5:].T # df.query('daysleft>7 & daysleft<60 & not inTheMoney & Type=="CALL" & openInterest>10000').sort_values(by='bid')[-5:].T