NetworkX is a Python package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks.
Using daily adjusted close data from 20201118 to 20201218 for Dow 30 stocks, we compute correlation coefficients, apply a threshold of 0.8 to find similar stocks, and produce two types of graphs with NetworkX.
To compute correlation coefficients, we read in daily adjusted close data and use Pandas to construct a correlation coefficient matrix.
def correlation_coefficient_matrix(): # get symbols df = pd.read_pickle('df_dow30.pkl') # sort symbols alphabetically df.sort_values(by='Symbol', ascending=True, inplace=True) array_symbols = df['Symbol'].values # get data, put into dictionary then dataframe dict_sym_ac = {} # key=symbol, value=array of adj close for sym in array_symbols: dftemp = READ DATA dict_sym_ac[sym] = dftemp['Adj_Close'].values # create correlation coeff df dfdata = pd.DataFrame.from_dict(dict_sym_ac) dfcc = dfdata.corr().round(3) dfcc.to_csv('correlation_coefficient_matrix.csv') dfcc.to_pickle('correlation_coefficient_matrix.pkl') return dfcc
Dow 30 Correlation Coefficient Matrix
AAPL | AMGN | AXP | BA | CAT | CRM | CSCO | CVX | DIS | DOW | GS | HD | HON | IBM | INTC | JNJ | JPM | KO | MCD | MMM | MRK | MSFT | NKE | PG | TRV | UNH | V | VZ | WBA | WMT | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
AAPL | 1.0 | 0.817 | 0.322 | 0.561 | 0.643 | -0.803 | 0.837 | -0.12 | 0.763 | -0.455 | 0.716 | -0.236 | 0.628 | 0.563 | 0.736 | 0.832 | 0.237 | 0.606 | -0.409 | 0.006 | 0.367 | 0.662 | 0.798 | -0.412 | 0.525 | 0.507 | 0.228 | 0.123 | 0.637 | -0.734 |
AMGN | 0.817 | 1.0 | 0.239 | 0.552 | 0.669 | -0.76 | 0.704 | -0.112 | 0.641 | -0.364 | 0.604 | -0.474 | 0.633 | 0.419 | 0.653 | 0.855 | 0.085 | 0.693 | -0.59 | -0.105 | 0.469 | 0.351 | 0.668 | -0.529 | 0.489 | 0.607 | 0.1 | 0.178 | 0.708 | -0.807 |
AXP | 0.322 | 0.239 | 1.0 | 0.834 | 0.482 | -0.621 | 0.639 | 0.696 | 0.171 | -0.044 | 0.656 | -0.318 | 0.663 | 0.804 | 0.754 | 0.331 | 0.901 | 0.318 | -0.412 | 0.056 | 0.614 | 0.28 | 0.618 | -0.474 | 0.065 | 0.657 | 0.649 | 0.778 | 0.733 | -0.096 |
BA | 0.561 | 0.552 | 0.834 | 1.0 | 0.718 | -0.843 | 0.81 | 0.601 | 0.5 | -0.189 | 0.806 | -0.618 | 0.857 | 0.78 | 0.858 | 0.624 | 0.722 | 0.637 | -0.651 | 0.064 | 0.716 | 0.245 | 0.814 | -0.791 | 0.31 | 0.624 | 0.349 | 0.608 | 0.936 | -0.542 |
CAT | 0.643 | 0.669 | 0.482 | 0.718 | 1.0 | -0.697 | 0.814 | 0.447 | 0.745 | 0.109 | 0.848 | -0.501 | 0.881 | 0.77 | 0.664 | 0.752 | 0.49 | 0.914 | -0.403 | 0.372 | 0.428 | 0.366 | 0.827 | -0.66 | 0.668 | 0.358 | 0.312 | 0.202 | 0.773 | -0.762 |
CRM | -0.803 | -0.76 | -0.621 | -0.843 | -0.697 | 1.0 | -0.921 | -0.251 | -0.715 | 0.456 | -0.84 | 0.428 | -0.825 | -0.757 | -0.893 | -0.798 | -0.492 | -0.608 | 0.619 | -0.057 | -0.622 | -0.419 | -0.843 | 0.746 | -0.397 | -0.573 | -0.212 | -0.395 | -0.862 | 0.71 |
CSCO | 0.837 | 0.704 | 0.639 | 0.81 | 0.814 | -0.921 | 1.0 | 0.307 | 0.787 | -0.251 | 0.926 | -0.295 | 0.861 | 0.879 | 0.856 | 0.824 | 0.586 | 0.683 | -0.433 | 0.235 | 0.509 | 0.613 | 0.905 | -0.637 | 0.505 | 0.5 | 0.417 | 0.338 | 0.812 | -0.686 |
CVX | -0.12 | -0.112 | 0.696 | 0.601 | 0.447 | -0.251 | 0.307 | 1.0 | 0.129 | 0.42 | 0.525 | -0.286 | 0.592 | 0.605 | 0.383 | 0.052 | 0.792 | 0.323 | -0.193 | 0.432 | 0.37 | -0.054 | 0.371 | -0.528 | 0.215 | 0.097 | 0.286 | 0.375 | 0.433 | -0.031 |
DIS | 0.763 | 0.641 | 0.171 | 0.5 | 0.745 | -0.715 | 0.787 | 0.129 | 1.0 | -0.188 | 0.767 | -0.162 | 0.748 | 0.593 | 0.583 | 0.72 | 0.249 | 0.701 | -0.209 | 0.39 | 0.155 | 0.596 | 0.732 | -0.553 | 0.667 | 0.081 | 0.022 | -0.193 | 0.539 | -0.824 |
DOW | -0.455 | -0.364 | -0.044 | -0.189 | 0.109 | 0.456 | -0.251 | 0.42 | -0.188 | 1.0 | -0.031 | 0.105 | -0.056 | 0.02 | -0.447 | -0.232 | 0.218 | 0.165 | 0.396 | 0.551 | -0.31 | -0.123 | -0.221 | 0.229 | 0.365 | -0.301 | 0.143 | -0.033 | -0.217 | 0.208 |
GS | 0.716 | 0.604 | 0.656 | 0.806 | 0.848 | -0.84 | 0.926 | 0.525 | 0.767 | -0.031 | 1.0 | -0.269 | 0.914 | 0.914 | 0.793 | 0.738 | 0.7 | 0.704 | -0.372 | 0.463 | 0.462 | 0.535 | 0.904 | -0.713 | 0.649 | 0.362 | 0.374 | 0.248 | 0.756 | -0.674 |
HD | -0.236 | -0.474 | -0.318 | -0.618 | -0.501 | 0.428 | -0.295 | -0.286 | -0.162 | 0.105 | -0.269 | 1.0 | -0.461 | -0.144 | -0.372 | -0.48 | -0.124 | -0.584 | 0.844 | 0.321 | -0.714 | 0.292 | -0.374 | 0.571 | -0.155 | -0.504 | 0.084 | -0.407 | -0.675 | 0.506 |
HON | 0.628 | 0.633 | 0.663 | 0.857 | 0.881 | -0.825 | 0.861 | 0.592 | 0.748 | -0.056 | 0.914 | -0.461 | 1.0 | 0.853 | 0.801 | 0.716 | 0.652 | 0.789 | -0.495 | 0.374 | 0.571 | 0.381 | 0.876 | -0.79 | 0.537 | 0.368 | 0.303 | 0.292 | 0.832 | -0.664 |
IBM | 0.563 | 0.419 | 0.804 | 0.78 | 0.77 | -0.757 | 0.879 | 0.605 | 0.593 | 0.02 | 0.914 | -0.144 | 0.853 | 1.0 | 0.795 | 0.552 | 0.838 | 0.562 | -0.225 | 0.459 | 0.426 | 0.554 | 0.828 | -0.578 | 0.407 | 0.393 | 0.599 | 0.431 | 0.705 | -0.405 |
INTC | 0.736 | 0.653 | 0.754 | 0.858 | 0.664 | -0.893 | 0.856 | 0.383 | 0.583 | -0.447 | 0.793 | -0.372 | 0.801 | 0.795 | 1.0 | 0.637 | 0.618 | 0.537 | -0.527 | 0.007 | 0.61 | 0.461 | 0.835 | -0.708 | 0.253 | 0.626 | 0.355 | 0.455 | 0.845 | -0.524 |
JNJ | 0.832 | 0.855 | 0.331 | 0.624 | 0.752 | -0.798 | 0.824 | 0.052 | 0.72 | -0.232 | 0.738 | -0.48 | 0.716 | 0.552 | 0.637 | 1.0 | 0.193 | 0.754 | -0.654 | 0.071 | 0.603 | 0.381 | 0.779 | -0.548 | 0.585 | 0.496 | 0.133 | 0.21 | 0.737 | -0.774 |
JPM | 0.237 | 0.085 | 0.901 | 0.722 | 0.49 | -0.492 | 0.586 | 0.792 | 0.249 | 0.218 | 0.7 | -0.124 | 0.652 | 0.838 | 0.618 | 0.193 | 1.0 | 0.327 | -0.131 | 0.365 | 0.364 | 0.394 | 0.581 | -0.399 | 0.247 | 0.419 | 0.638 | 0.604 | 0.561 | -0.082 |
KO | 0.606 | 0.693 | 0.318 | 0.637 | 0.914 | -0.608 | 0.683 | 0.323 | 0.701 | 0.165 | 0.704 | -0.584 | 0.789 | 0.562 | 0.537 | 0.754 | 0.327 | 1.0 | -0.431 | 0.302 | 0.364 | 0.28 | 0.701 | -0.595 | 0.741 | 0.328 | 0.135 | 0.169 | 0.757 | -0.779 |
MCD | -0.409 | -0.59 | -0.412 | -0.651 | -0.403 | 0.619 | -0.433 | -0.193 | -0.209 | 0.396 | -0.372 | 0.844 | -0.495 | -0.225 | -0.527 | -0.654 | -0.131 | -0.431 | 1.0 | 0.367 | -0.883 | 0.17 | -0.463 | 0.603 | -0.079 | -0.615 | 0.085 | -0.464 | -0.697 | 0.472 |
MMM | 0.006 | -0.105 | 0.056 | 0.064 | 0.372 | -0.057 | 0.235 | 0.432 | 0.39 | 0.551 | 0.463 | 0.321 | 0.374 | 0.459 | 0.007 | 0.071 | 0.365 | 0.302 | 0.367 | 1.0 | -0.292 | 0.326 | 0.299 | -0.171 | 0.559 | -0.459 | 0.088 | -0.269 | -0.01 | -0.157 |
MRK | 0.367 | 0.469 | 0.614 | 0.716 | 0.428 | -0.622 | 0.509 | 0.37 | 0.155 | -0.31 | 0.462 | -0.714 | 0.571 | 0.426 | 0.61 | 0.603 | 0.364 | 0.364 | -0.883 | -0.292 | 1.0 | -0.155 | 0.546 | -0.612 | -0.019 | 0.606 | 0.131 | 0.603 | 0.671 | -0.276 |
MSFT | 0.662 | 0.351 | 0.28 | 0.245 | 0.366 | -0.419 | 0.613 | -0.054 | 0.596 | -0.123 | 0.535 | 0.292 | 0.381 | 0.554 | 0.461 | 0.381 | 0.394 | 0.28 | 0.17 | 0.326 | -0.155 | 1.0 | 0.515 | 0.057 | 0.382 | 0.218 | 0.536 | -0.025 | 0.258 | -0.307 |
NKE | 0.798 | 0.668 | 0.618 | 0.814 | 0.827 | -0.843 | 0.905 | 0.371 | 0.732 | -0.221 | 0.904 | -0.374 | 0.876 | 0.828 | 0.835 | 0.779 | 0.581 | 0.701 | -0.463 | 0.299 | 0.546 | 0.515 | 1.0 | -0.646 | 0.533 | 0.467 | 0.386 | 0.299 | 0.798 | -0.687 |
PG | -0.412 | -0.529 | -0.474 | -0.791 | -0.66 | 0.746 | -0.637 | -0.528 | -0.553 | 0.229 | -0.713 | 0.571 | -0.79 | -0.578 | -0.708 | -0.548 | -0.399 | -0.595 | 0.603 | -0.171 | -0.612 | 0.057 | -0.646 | 1.0 | -0.344 | -0.227 | 0.146 | -0.187 | -0.737 | 0.645 |
TRV | 0.525 | 0.489 | 0.065 | 0.31 | 0.668 | -0.397 | 0.505 | 0.215 | 0.667 | 0.365 | 0.649 | -0.155 | 0.537 | 0.407 | 0.253 | 0.585 | 0.247 | 0.741 | -0.079 | 0.559 | -0.019 | 0.382 | 0.533 | -0.344 | 1.0 | 0.037 | -0.011 | -0.167 | 0.392 | -0.715 |
UNH | 0.507 | 0.607 | 0.657 | 0.624 | 0.358 | -0.573 | 0.5 | 0.097 | 0.081 | -0.301 | 0.362 | -0.504 | 0.368 | 0.393 | 0.626 | 0.496 | 0.419 | 0.328 | -0.615 | -0.459 | 0.606 | 0.218 | 0.467 | -0.227 | 0.037 | 1.0 | 0.478 | 0.795 | 0.705 | -0.283 |
V | 0.228 | 0.1 | 0.649 | 0.349 | 0.312 | -0.212 | 0.417 | 0.286 | 0.022 | 0.143 | 0.374 | 0.084 | 0.303 | 0.599 | 0.355 | 0.133 | 0.638 | 0.135 | 0.085 | 0.088 | 0.131 | 0.536 | 0.386 | 0.146 | -0.011 | 0.478 | 1.0 | 0.499 | 0.309 | 0.127 |
VZ | 0.123 | 0.178 | 0.778 | 0.608 | 0.202 | -0.395 | 0.338 | 0.375 | -0.193 | -0.033 | 0.248 | -0.407 | 0.292 | 0.431 | 0.455 | 0.21 | 0.604 | 0.169 | -0.464 | -0.269 | 0.603 | -0.025 | 0.299 | -0.187 | -0.167 | 0.795 | 0.499 | 1.0 | 0.582 | 0.094 |
WBA | 0.637 | 0.708 | 0.733 | 0.936 | 0.773 | -0.862 | 0.812 | 0.433 | 0.539 | -0.217 | 0.756 | -0.675 | 0.832 | 0.705 | 0.845 | 0.737 | 0.561 | 0.757 | -0.697 | -0.01 | 0.671 | 0.258 | 0.798 | -0.737 | 0.392 | 0.705 | 0.309 | 0.582 | 1.0 | -0.657 |
WMT | -0.734 | -0.807 | -0.096 | -0.542 | -0.762 | 0.71 | -0.686 | -0.031 | -0.824 | 0.208 | -0.674 | 0.506 | -0.664 | -0.405 | -0.524 | -0.774 | -0.082 | -0.779 | 0.472 | -0.157 | -0.276 | -0.307 | -0.687 | 0.645 | -0.715 | -0.283 | 0.127 | 0.094 | -0.657 | 1.0 |
We then create a new DataFrame for use in NetworkX. Here, we have three columns: ‘from’, ‘to’, ‘corr coeff’ which are graph nodes (stock symbol), graph edges (undirected), and graph edge weights (correlation coefficient), and only include edges with correlation coefficient > 0.8.
We construct two types of graphs, circular and spring, then plot the results. As we are only interested in displaying correlation coefficient information, the choice of graph types was dictated by a desire to show similar stocks and clusters of such stocks.
import pandas as pd import networkx import matplotlib.pyplot as plt if __name__ == '__main__': # compute correlation coeff matrix try: df_cc = pd.read_pickle('correlation_coefficient_matrix.pkl') except: df_cc = correlation_coefficient_matrix() # apply corr coeff threshold and create new df list_symbols = df_cc.columns.to_list() threshold = 0.8 # corr coeff threshold list_from = [] list_to = [] list_corr_coeff = [] for sym_from in list_symbols: for sym_to in list_symbols: if sym_from != sym_to: corr_coef = df_cc.loc[sym_from, sym_to] if corr_coef > threshold: list_from.append(sym_from) list_to.append(sym_to) list_corr_coeff.append(corr_coef) # create df for constructing graph df_graph = pd.DataFrame({'from':list_from, 'to':list_to, 'corr coeff':list_corr_coeff}) G=networkx.from_pandas_edgelist(df_graph, 'from', 'to') # plot the network networkx.draw_spring(G, with_labels=True, node_color='cornflowerblue', node_size=600, edge_color='black', linewidths=1, font_size=10, width=1, alpha=0.5) plt.savefig('dow_30_corr_coeff_spring.png') # plot the network plt.clf() networkx.draw_circular(G, with_labels=True, node_color='cornflowerblue', node_size=600, edge_color='black', linewidths=1, font_size=10, width=1, alpha=0.5) plt.savefig('dow_30_corr_coeff_circular.png')