Various way of Stock Data Analysis
In this post, we will taste various way of stock data analysis. This is the tutorial offered in Finance AI lecture from Chung-Ang University.
We will use FinanceDataReader package for crawling stock data. And it will compare with pandas_datareader, which is widely used for remote data access.
import FinanceDataReader as fdr
import pandas_datareader as pdr
import math
import numpy as np
import pandas as pd
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('fivethirtyeight')
plt.rcParams['figure.figsize'] = (16, 10)
Here, we will show some trends of stock analysis, especially on KOSPI (Korea Composite Stock Price Index) from 2000-01-01 to today. Additionally, we will also try to find trend in other Stock index.
end = datetime.today()
start = datetime(2000,1,1)
kospi = pdr.DataReader('^KS11','yahoo',start,end) # KOSPI
ss_yh = pdr.DataReader('005930.KS','yahoo',start,end) # YAHOO: Samsung Electronics Co., Ltd.
ss_f = fdr.DataReader('005930', start, end) # FDR: Samsung Electronics Co., Ltd.
hm = pdr.DataReader('005380.KS','yahoo',start,end) # Hyundai Motor Company
snp = pdr.DataReader('^GSPC','yahoo',start,end) #S&P500
nikkei = pdr.DataReader('^N225','yahoo',start,end) # Nikkei 225
euronext = pdr.DataReader('^N100','yahoo',start,end) # EURONEXT 100
vix = pdr.DataReader('^VIX','yahoo',start,end) # VIX
kospi.head()
ss_yh.head()
ss_f.head()
As you can see, the information contained each packages is different. FinanceDataReader
shows the amount of volume and its daily changes. Unlike it, yahoo finance data shows the total volume in US dollars and Adjusted Close value. We will not cover the meaning of each column in details, but we use Close value for stock analysis.
plt.subplot(1, 2, 1)
ss_yh_close = ss_yh[['Close']]
plt.plot(ss_yh_close)
plt.subplot(1, 2, 2)
ss_yh_aclose = ss_yh[['Adj Close']]
plt.plot(ss_yh_aclose)
plt.show()
ss = ss_yh
eqt = pd.DataFrame({
'SP500': snp['Adj Close'],
'KOSPI': kospi['Adj Close'],
'ss': ss['Close'],
'hm': hm['Adj Close'],
'nikkei': nikkei['Adj Close'],
'euronext': euronext['Adj Close'],
'VIX': vix['Adj Close']
})
eqt = eqt.dropna()
eqt.head()
We can measure the earning rate for each date.
rtn = (np.log(eqt) - np.log(eqt.shift(1))) * 100
rtn.columns = ['r_snp', 'r_ko', 'r_ss', 'r_hm', 'r_nk', 'r_ux', 'r_vx']
rtn = rtn.dropna()
rtn.head()
rtn.plot(fontsize=16)
plt.show()
plt.plot(eqt)
plt.show()
pr = pd.merge(eqt,rtn,left_index=True, right_index=True, how='inner')
pr.head()
pr_0 = eqt / eqt.iloc[0] - 1.0
pr_0.plot(fontsize=18)
plt.show()
pr[['KOSPI', 'r_ko']].plot(subplots=True, fontsize=16)
plt.show()
fig, axs = plt.subplots(3, 1, figsize=(15,10), constrained_layout=True)
fig.suptitle('KOSPI, VIX, and Return', fontsize=18)
axs[0].plot(pr.index, pr['KOSPI'], 'g-', label='KOSPI')
axs[0].set_title('KOSPI')
axs[0].legend(['KOSPI'])
axs[1].plot(pr.index, pr['VIX'], 'r-')
axs[1].set_title('VIX')
axs[2].plot(pr.index, pr['r_ko'], 'b-')
axs[2].set_title('KOSPI Return')
plt.show()
df=pr
stats = df.describe()
stats.loc['var'] = df.var().tolist()
stats.loc['skew'] = df.skew().tolist()
stats.loc['kurt'] = df.kurtosis().tolist()
print(stats)
df=rtn
stats = df.describe()
stats.loc['var'] = df.var().tolist()
stats.loc['skew'] = df.skew().tolist()
stats.loc['kurt'] = df.kurtosis().tolist()
print(stats)
sns.histplot(pr['SP500'], label='S&P500', color='red', kde=True)
sns.histplot(pr['KOSPI'], label='KOSPI', color='g', kde=True)
plt.xlabel('Price')
plt.ylabel('Relative Freqeuncy')
plt.legend()
plt.show()
sns.histplot(pr['SP500'], kde = False, label='S&P500', color='r')
sns.histplot(pr['KOSPI'], kde = False, label='KOSPI', color='g')
# Plot formatting
plt.title('Frequency: Price')
plt.xlabel('Price')
plt.ylabel('Freqeuncy')
plt.legend()
plt.show()
sns.kdeplot(pr['SP500'], label='S&P500', color='r')
sns.kdeplot(pr['KOSPI'], label='KOSPI', color='g')
# Plot formatting
plt.title('Histogram: Price')
plt.xlabel('Price')
plt.ylabel('Density')
plt.legend()
plt.show()
sns.kdeplot(pr['r_snp'], label='S&P500', color='r', fill=True)
sns.kdeplot(pr['r_ko'], label='KOSPI', color='g', fill=True)
plt.xlabel('Return')
plt.ylabel('Relative Freqeuncy')
plt.legend()
plt.show()
pr.corr()
rtn.corr()
sns.heatmap(pr.corr(), annot=True, cmap='inferno')
plt.show()
sns.heatmap(eqt.corr(), annot=True, cmap="inferno")
plt.show()
sns.heatmap(rtn.corr(), annot=True, cmap="inferno")
plt.show()
sns.pairplot(pr)
plt.show()
sns.pairplot(eqt)
plt.show()
sns.pairplot(rtn)
plt.show()