NetworkX is a Python package for the creation, manipulation, and study of the structure, dynamics, and functions of complex networks.

Using daily adjusted close data from 20201118 to 20201218 for Dow 30 stocks, we compute correlation coefficients, apply a threshold of 0.8 to find similar stocks, and produce two types of graphs with NetworkX.

To compute correlation coefficients, we read in daily adjusted close data and use Pandas to construct a correlation coefficient matrix.

def correlation_coefficient_matrix():   
    # get symbols
    df = pd.read_pickle('df_dow30.pkl')

    # sort symbols alphabetically
    df.sort_values(by='Symbol', ascending=True, inplace=True)
    array_symbols = df['Symbol'].values
    
    # get data, put into dictionary then dataframe
    dict_sym_ac = {}  # key=symbol, value=array of adj close
    for sym in array_symbols:
        dftemp = READ DATA
        dict_sym_ac[sym] = dftemp['Adj_Close'].values
    
    # create correlation coeff df
    dfdata = pd.DataFrame.from_dict(dict_sym_ac)
    dfcc = dfdata.corr().round(3)
    dfcc.to_csv('correlation_coefficient_matrix.csv')
    dfcc.to_pickle('correlation_coefficient_matrix.pkl')
    
    return dfcc

Dow 30 Correlation Coefficient Matrix

AAPLAMGNAXPBACATCRMCSCOCVXDISDOWGSHDHONIBMINTCJNJJPMKOMCDMMMMRKMSFTNKEPGTRVUNHVVZWBAWMT
AAPL1.00.8170.3220.5610.643-0.8030.837-0.120.763-0.4550.716-0.2360.6280.5630.7360.8320.2370.606-0.4090.0060.3670.6620.798-0.4120.5250.5070.2280.1230.637-0.734
AMGN0.8171.00.2390.5520.669-0.760.704-0.1120.641-0.3640.604-0.4740.6330.4190.6530.8550.0850.693-0.59-0.1050.4690.3510.668-0.5290.4890.6070.10.1780.708-0.807
AXP0.3220.2391.00.8340.482-0.6210.6390.6960.171-0.0440.656-0.3180.6630.8040.7540.3310.9010.318-0.4120.0560.6140.280.618-0.4740.0650.6570.6490.7780.733-0.096
BA0.5610.5520.8341.00.718-0.8430.810.6010.5-0.1890.806-0.6180.8570.780.8580.6240.7220.637-0.6510.0640.7160.2450.814-0.7910.310.6240.3490.6080.936-0.542
CAT0.6430.6690.4820.7181.0-0.6970.8140.4470.7450.1090.848-0.5010.8810.770.6640.7520.490.914-0.4030.3720.4280.3660.827-0.660.6680.3580.3120.2020.773-0.762
CRM-0.803-0.76-0.621-0.843-0.6971.0-0.921-0.251-0.7150.456-0.840.428-0.825-0.757-0.893-0.798-0.492-0.6080.619-0.057-0.622-0.419-0.8430.746-0.397-0.573-0.212-0.395-0.8620.71
CSCO0.8370.7040.6390.810.814-0.9211.00.3070.787-0.2510.926-0.2950.8610.8790.8560.8240.5860.683-0.4330.2350.5090.6130.905-0.6370.5050.50.4170.3380.812-0.686
CVX-0.12-0.1120.6960.6010.447-0.2510.3071.00.1290.420.525-0.2860.5920.6050.3830.0520.7920.323-0.1930.4320.37-0.0540.371-0.5280.2150.0970.2860.3750.433-0.031
DIS0.7630.6410.1710.50.745-0.7150.7870.1291.0-0.1880.767-0.1620.7480.5930.5830.720.2490.701-0.2090.390.1550.5960.732-0.5530.6670.0810.022-0.1930.539-0.824
DOW-0.455-0.364-0.044-0.1890.1090.456-0.2510.42-0.1881.0-0.0310.105-0.0560.02-0.447-0.2320.2180.1650.3960.551-0.31-0.123-0.2210.2290.365-0.3010.143-0.033-0.2170.208
GS0.7160.6040.6560.8060.848-0.840.9260.5250.767-0.0311.0-0.2690.9140.9140.7930.7380.70.704-0.3720.4630.4620.5350.904-0.7130.6490.3620.3740.2480.756-0.674
HD-0.236-0.474-0.318-0.618-0.5010.428-0.295-0.286-0.1620.105-0.2691.0-0.461-0.144-0.372-0.48-0.124-0.5840.8440.321-0.7140.292-0.3740.571-0.155-0.5040.084-0.407-0.6750.506
HON0.6280.6330.6630.8570.881-0.8250.8610.5920.748-0.0560.914-0.4611.00.8530.8010.7160.6520.789-0.4950.3740.5710.3810.876-0.790.5370.3680.3030.2920.832-0.664
IBM0.5630.4190.8040.780.77-0.7570.8790.6050.5930.020.914-0.1440.8531.00.7950.5520.8380.562-0.2250.4590.4260.5540.828-0.5780.4070.3930.5990.4310.705-0.405
INTC0.7360.6530.7540.8580.664-0.8930.8560.3830.583-0.4470.793-0.3720.8010.7951.00.6370.6180.537-0.5270.0070.610.4610.835-0.7080.2530.6260.3550.4550.845-0.524
JNJ0.8320.8550.3310.6240.752-0.7980.8240.0520.72-0.2320.738-0.480.7160.5520.6371.00.1930.754-0.6540.0710.6030.3810.779-0.5480.5850.4960.1330.210.737-0.774
JPM0.2370.0850.9010.7220.49-0.4920.5860.7920.2490.2180.7-0.1240.6520.8380.6180.1931.00.327-0.1310.3650.3640.3940.581-0.3990.2470.4190.6380.6040.561-0.082
KO0.6060.6930.3180.6370.914-0.6080.6830.3230.7010.1650.704-0.5840.7890.5620.5370.7540.3271.0-0.4310.3020.3640.280.701-0.5950.7410.3280.1350.1690.757-0.779
MCD-0.409-0.59-0.412-0.651-0.4030.619-0.433-0.193-0.2090.396-0.3720.844-0.495-0.225-0.527-0.654-0.131-0.4311.00.367-0.8830.17-0.4630.603-0.079-0.6150.085-0.464-0.6970.472
MMM0.006-0.1050.0560.0640.372-0.0570.2350.4320.390.5510.4630.3210.3740.4590.0070.0710.3650.3020.3671.0-0.2920.3260.299-0.1710.559-0.4590.088-0.269-0.01-0.157
MRK0.3670.4690.6140.7160.428-0.6220.5090.370.155-0.310.462-0.7140.5710.4260.610.6030.3640.364-0.883-0.2921.0-0.1550.546-0.612-0.0190.6060.1310.6030.671-0.276
MSFT0.6620.3510.280.2450.366-0.4190.613-0.0540.596-0.1230.5350.2920.3810.5540.4610.3810.3940.280.170.326-0.1551.00.5150.0570.3820.2180.536-0.0250.258-0.307
NKE0.7980.6680.6180.8140.827-0.8430.9050.3710.732-0.2210.904-0.3740.8760.8280.8350.7790.5810.701-0.4630.2990.5460.5151.0-0.6460.5330.4670.3860.2990.798-0.687
PG-0.412-0.529-0.474-0.791-0.660.746-0.637-0.528-0.5530.229-0.7130.571-0.79-0.578-0.708-0.548-0.399-0.5950.603-0.171-0.6120.057-0.6461.0-0.344-0.2270.146-0.187-0.7370.645
TRV0.5250.4890.0650.310.668-0.3970.5050.2150.6670.3650.649-0.1550.5370.4070.2530.5850.2470.741-0.0790.559-0.0190.3820.533-0.3441.00.037-0.011-0.1670.392-0.715
UNH0.5070.6070.6570.6240.358-0.5730.50.0970.081-0.3010.362-0.5040.3680.3930.6260.4960.4190.328-0.615-0.4590.6060.2180.467-0.2270.0371.00.4780.7950.705-0.283
V0.2280.10.6490.3490.312-0.2120.4170.2860.0220.1430.3740.0840.3030.5990.3550.1330.6380.1350.0850.0880.1310.5360.3860.146-0.0110.4781.00.4990.3090.127
VZ0.1230.1780.7780.6080.202-0.3950.3380.375-0.193-0.0330.248-0.4070.2920.4310.4550.210.6040.169-0.464-0.2690.603-0.0250.299-0.187-0.1670.7950.4991.00.5820.094
WBA0.6370.7080.7330.9360.773-0.8620.8120.4330.539-0.2170.756-0.6750.8320.7050.8450.7370.5610.757-0.697-0.010.6710.2580.798-0.7370.3920.7050.3090.5821.0-0.657
WMT-0.734-0.807-0.096-0.542-0.7620.71-0.686-0.031-0.8240.208-0.6740.506-0.664-0.405-0.524-0.774-0.082-0.7790.472-0.157-0.276-0.307-0.6870.645-0.715-0.2830.1270.094-0.6571.0

We then create a new DataFrame for use in NetworkX. Here, we have three columns: ‘from’, ‘to’, ‘corr coeff’ which are graph nodes (stock symbol), graph edges (undirected), and graph edge weights (correlation coefficient), and only include edges with correlation coefficient > 0.8.

We construct two types of graphs, circular and spring, then plot the results. As we are only interested in displaying correlation coefficient information, the choice of graph types was dictated by a desire to show similar stocks and clusters of such stocks.

import pandas as pd
import networkx
import matplotlib.pyplot as plt

if __name__ == '__main__':
    
    # compute correlation coeff matrix
    try:
        df_cc = pd.read_pickle('correlation_coefficient_matrix.pkl')
    except:
        df_cc = correlation_coefficient_matrix()
        
    
    # apply corr coeff threshold and create new df
    list_symbols = df_cc.columns.to_list()
    threshold = 0.8  # corr coeff threshold
    list_from = []
    list_to = []
    list_corr_coeff = []
    for sym_from in list_symbols:
        for sym_to in list_symbols:
            if sym_from != sym_to:
                corr_coef = df_cc.loc[sym_from, sym_to]
                if corr_coef > threshold:
                    list_from.append(sym_from)
                    list_to.append(sym_to)
                    list_corr_coeff.append(corr_coef)
                    
    # create df for constructing graph
    df_graph = pd.DataFrame({'from':list_from, 'to':list_to, 
                             'corr coeff':list_corr_coeff})
    
    G=networkx.from_pandas_edgelist(df_graph, 'from', 'to')
    
 
    # plot the network
    networkx.draw_spring(G, with_labels=True, node_color='cornflowerblue', node_size=600, 
                         edge_color='black', linewidths=1, font_size=10,
                         width=1, alpha=0.5)

    plt.savefig('dow_30_corr_coeff_spring.png')
    
    
    # plot the network
    plt.clf()
    networkx.draw_circular(G, with_labels=True, node_color='cornflowerblue', node_size=600, 
                           edge_color='black', linewidths=1, font_size=10,
                           width=1, alpha=0.5)

    plt.savefig('dow_30_corr_coeff_circular.png')

Circular Graph

Spring Graph