Spaces:
Runtime error
Runtime error
Initial commit
Browse files- app.py +124 -0
- requirements.txt +1 -0
app.py
ADDED
|
@@ -0,0 +1,124 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Demo is based on https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html
|
| 3 |
+
"""
|
| 4 |
+
|
| 5 |
+
import sys
|
| 6 |
+
import numpy as np
|
| 7 |
+
import pandas as pd
|
| 8 |
+
|
| 9 |
+
symbol_dict = {
|
| 10 |
+
"TOT": "Total",
|
| 11 |
+
"XOM": "Exxon",
|
| 12 |
+
"CVX": "Chevron",
|
| 13 |
+
"COP": "ConocoPhillips",
|
| 14 |
+
"VLO": "Valero Energy",
|
| 15 |
+
"MSFT": "Microsoft",
|
| 16 |
+
"IBM": "IBM",
|
| 17 |
+
"TWX": "Time Warner",
|
| 18 |
+
"CMCSA": "Comcast",
|
| 19 |
+
"CVC": "Cablevision",
|
| 20 |
+
"YHOO": "Yahoo",
|
| 21 |
+
"DELL": "Dell",
|
| 22 |
+
"HPQ": "HP",
|
| 23 |
+
"AMZN": "Amazon",
|
| 24 |
+
"TM": "Toyota",
|
| 25 |
+
"CAJ": "Canon",
|
| 26 |
+
"SNE": "Sony",
|
| 27 |
+
"F": "Ford",
|
| 28 |
+
"HMC": "Honda",
|
| 29 |
+
"NAV": "Navistar",
|
| 30 |
+
"NOC": "Northrop Grumman",
|
| 31 |
+
"BA": "Boeing",
|
| 32 |
+
"KO": "Coca Cola",
|
| 33 |
+
"MMM": "3M",
|
| 34 |
+
"MCD": "McDonald's",
|
| 35 |
+
"PEP": "Pepsi",
|
| 36 |
+
"K": "Kellogg",
|
| 37 |
+
"UN": "Unilever",
|
| 38 |
+
"MAR": "Marriott",
|
| 39 |
+
"PG": "Procter Gamble",
|
| 40 |
+
"CL": "Colgate-Palmolive",
|
| 41 |
+
"GE": "General Electrics",
|
| 42 |
+
"WFC": "Wells Fargo",
|
| 43 |
+
"JPM": "JPMorgan Chase",
|
| 44 |
+
"AIG": "AIG",
|
| 45 |
+
"AXP": "American express",
|
| 46 |
+
"BAC": "Bank of America",
|
| 47 |
+
"GS": "Goldman Sachs",
|
| 48 |
+
"AAPL": "Apple",
|
| 49 |
+
"SAP": "SAP",
|
| 50 |
+
"CSCO": "Cisco",
|
| 51 |
+
"TXN": "Texas Instruments",
|
| 52 |
+
"XRX": "Xerox",
|
| 53 |
+
"WMT": "Wal-Mart",
|
| 54 |
+
"HD": "Home Depot",
|
| 55 |
+
"GSK": "GlaxoSmithKline",
|
| 56 |
+
"PFE": "Pfizer",
|
| 57 |
+
"SNY": "Sanofi-Aventis",
|
| 58 |
+
"NVS": "Novartis",
|
| 59 |
+
"KMB": "Kimberly-Clark",
|
| 60 |
+
"R": "Ryder",
|
| 61 |
+
"GD": "General Dynamics",
|
| 62 |
+
"RTN": "Raytheon",
|
| 63 |
+
"CVS": "CVS",
|
| 64 |
+
"CAT": "Caterpillar",
|
| 65 |
+
"DD": "DuPont de Nemours",
|
| 66 |
+
}
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
symbols, names = np.array(sorted(symbol_dict.items())).T
|
| 70 |
+
|
| 71 |
+
quotes = []
|
| 72 |
+
|
| 73 |
+
for symbol in symbols:
|
| 74 |
+
print("Fetching quote history for %r" % symbol, file=sys.stderr)
|
| 75 |
+
url = (
|
| 76 |
+
"https://raw.githubusercontent.com/scikit-learn/examples-data/"
|
| 77 |
+
"master/financial-data/{}.csv"
|
| 78 |
+
)
|
| 79 |
+
quotes.append(pd.read_csv(url.format(symbol)))
|
| 80 |
+
|
| 81 |
+
close_prices = np.vstack([q["close"] for q in quotes])
|
| 82 |
+
open_prices = np.vstack([q["open"] for q in quotes])
|
| 83 |
+
|
| 84 |
+
# The daily variations of the quotes are what carry the most information
|
| 85 |
+
variation = close_prices - open_prices
|
| 86 |
+
|
| 87 |
+
|
| 88 |
+
from sklearn import covariance
|
| 89 |
+
|
| 90 |
+
alphas = np.logspace(-1.5, 1, num=10)
|
| 91 |
+
edge_model = covariance.GraphicalLassoCV(alphas=alphas)
|
| 92 |
+
|
| 93 |
+
# standardize the time series: using correlations rather than covariance
|
| 94 |
+
# former is more efficient for structurerelations rather than covariance
|
| 95 |
+
# former is more efficient for structure recovery
|
| 96 |
+
X = variation.copy().T
|
| 97 |
+
X /= X.std(axis=0)
|
| 98 |
+
edge_model.fit(X)
|
| 99 |
+
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
from sklearn import cluster
|
| 103 |
+
|
| 104 |
+
_, labels = cluster.affinity_propagation(edge_model.covariance_, random_state=0)
|
| 105 |
+
n_labels = labels.max()
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
|
| 109 |
+
import gradio as gr
|
| 110 |
+
|
| 111 |
+
title = " π Visualizing the stock market structure π"
|
| 112 |
+
|
| 113 |
+
with gr.Blocks(title=title) as demo:
|
| 114 |
+
gr.Markdown(f"# {title}")
|
| 115 |
+
gr.Markdown(" Data is of 56 stocks between the period of 2003 - 2008 <br>")
|
| 116 |
+
gr.Markdown(" Stocks the move in together with each other are grouped together in a cluster <br>")
|
| 117 |
+
|
| 118 |
+
gr.Markdown(" **[Demo is based on sklearn docs](https://scikit-learn.org/stable/auto_examples/applications/plot_stock_market.html)**")
|
| 119 |
+
|
| 120 |
+
for i in range(n_labels + 1):
|
| 121 |
+
gr.Markdown( f"Cluster {i + 1}: {', '.join(names[labels == i])}")
|
| 122 |
+
|
| 123 |
+
gr.Markdown( f"## In progress")
|
| 124 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
scikit-learn==1.2.1
|