Spaces:

sklearn-docs
/

Visualizing_the_stock_market_structure

Runtime error

App Files Files Community

Visualizing_the_stock_market_structure / app.py

tushifire

Initial commit

0d803eb over 2 years ago

raw

history blame

3.23 kB

	"""
	Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html
	"""

	import sys
	import numpy as np
	import pandas as pd

	symbol_dict = {
	"TOT": "Total",
	"XOM": "Exxon",
	"CVX": "Chevron",
	"COP": "ConocoPhillips",
	"VLO": "Valero Energy",
	"MSFT": "Microsoft",
	"IBM": "IBM",
	"TWX": "Time Warner",
	"CMCSA": "Comcast",
	"CVC": "Cablevision",
	"YHOO": "Yahoo",
	"DELL": "Dell",
	"HPQ": "HP",
	"AMZN": "Amazon",
	"TM": "Toyota",
	"CAJ": "Canon",
	"SNE": "Sony",
	"F": "Ford",
	"HMC": "Honda",
	"NAV": "Navistar",
	"NOC": "Northrop Grumman",
	"BA": "Boeing",
	"KO": "Coca Cola",
	"MMM": "3M",
	"MCD": "McDonald's",
	"PEP": "Pepsi",
	"K": "Kellogg",
	"UN": "Unilever",
	"MAR": "Marriott",
	"PG": "Procter Gamble",
	"CL": "Colgate-Palmolive",
	"GE": "General Electrics",
	"WFC": "Wells Fargo",
	"JPM": "JPMorgan Chase",
	"AIG": "AIG",
	"AXP": "American express",
	"BAC": "Bank of America",
	"GS": "Goldman Sachs",
	"AAPL": "Apple",
	"SAP": "SAP",
	"CSCO": "Cisco",
	"TXN": "Texas Instruments",
	"XRX": "Xerox",
	"WMT": "Wal-Mart",
	"HD": "Home Depot",
	"GSK": "GlaxoSmithKline",
	"PFE": "Pfizer",
	"SNY": "Sanofi-Aventis",
	"NVS": "Novartis",
	"KMB": "Kimberly-Clark",
	"R": "Ryder",
	"GD": "General Dynamics",
	"RTN": "Raytheon",
	"CVS": "CVS",
	"CAT": "Caterpillar",
	"DD": "DuPont de Nemours",
	}


	symbols, names = np.array(sorted(symbol_dict.items())).T

	quotes = []

	for symbol in symbols:
	print("Fetching quote history for %r" % symbol, file=sys.stderr)
	url = (
	"https://raw.githubusercontent.com/scikit-learn/examples-data/"
	"master/financial-data/{}.csv"
	)
	quotes.append(pd.read_csv(url.format(symbol)))

	close_prices = np.vstack([q["close"] for q in quotes])
	open_prices = np.vstack([q["open"] for q in quotes])

	# The daily variations of the quotes are what carry the most information
	variation = close_prices - open_prices


	from sklearn import covariance

	alphas = np.logspace(-1.5, 1, num=10)
	edge_model = covariance.GraphicalLassoCV(alphas=alphas)

	# standardize the time series: using correlations rather than covariance
	# former is more efficient for structurerelations rather than covariance
	# former is more efficient for structure recovery
	X = variation.copy().T
	X /= X.std(axis=0)
	edge_model.fit(X)



	from sklearn import cluster

	_, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0)
	n_labels = labels.max()



	import gradio as gr

	title = " 📈 Visualizing the stock market structure 📈"

	with gr.Blocks(title=title) as demo:
	gr.Markdown(f"# {title}")
	gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008 <br>")
	gr.Markdown(" Stocks the move in together with each other are grouped together in a cluster <br>")

	gr.Markdown(" [Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)")

	for i in range(n_labels + 1):
	gr.Markdown( f"Cluster {i + 1}: {', '.join(names[labels == i])}")

	gr.Markdown( f"## In progress")
	demo.launch()