AnalyticsDojo

MA, AR, and Arma

introml.analyticsdojo.com

61. Out of Sample Prediction#

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 15, 5
import seaborn as sns
#!pip -q install yfinance
import yfinance as yf
sns.set(font_scale=2)
FB = yf.Ticker("META")
FB_values = FB.history(start="2020-06-01")
train_values=FB_values[:len(FB_values)-1000]
test_values=FB_values[len(FB_values)-1000:]


FB_values[['Close']].plot(lw=2,title='Complete')
train_values[['Close']].plot(lw=2,title='Train')
test_values[['Close']].plot(lw=2,title='Test')
<AxesSubplot:title={'center':'Test'}, xlabel='Date'>
../_images/1cbc019a12e76974eabeb395f4a0684791475d08d9273ec6984329de106d74d0.png ../_images/cf0366651bcb09acc24958a459fe961fa1d4903cff5945306fd928dc3ec415e5.png ../_images/ab6a93011e919d30b269af6e4220e4c50e698ef0b2b8fad5728e8518cad73d3c.png
from statsmodels.tsa.arima.model import ARIMA
ARMA_model = ARIMA(endog=train_values['Close'], order=(10, 0, 10))
results = ARMA_model.fit()
print(results.summary())
/opt/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  warnings.warn('A date index has been provided, but it has no'
/opt/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  warnings.warn('A date index has been provided, but it has no'
/opt/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/base/tsa_model.py:581: ValueWarning: A date index has been provided, but it has no associated frequency information and so will be ignored when e.g. forecasting.
  warnings.warn('A date index has been provided, but it has no'
/opt/anaconda3/lib/python3.8/site-packages/statsmodels/tsa/statespace/sarimax.py:978: UserWarning: Non-invertible starting MA parameters found. Using zeros as starting parameters.
  warn('Non-invertible starting MA parameters found.'
                               SARIMAX Results                                
==============================================================================
Dep. Variable:                  Close   No. Observations:                  216
Model:               ARIMA(10, 0, 10)   Log Likelihood                -681.035
Date:                Wed, 26 Oct 2022   AIC                           1406.071
Time:                        20:01:06   BIC                           1480.327
Sample:                             0   HQIC                          1436.071
                                - 216                                         
Covariance Type:                  opg                                         
==============================================================================
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const        264.4526     28.232      9.367      0.000     209.118     319.787
ar.L1          0.5871      1.666      0.352      0.724      -2.678       3.852
ar.L2          0.8793      0.858      1.024      0.306      -0.803       2.562
ar.L3         -1.2841      1.268     -1.013      0.311      -3.769       1.201
ar.L4          0.4175      2.047      0.204      0.838      -3.595       4.430
ar.L5          0.9243      0.893      1.035      0.301      -0.827       2.675
ar.L6         -1.2504      1.344     -0.930      0.352      -3.885       1.384
ar.L7          0.2804      1.940      0.145      0.885      -3.522       4.082
ar.L8          0.8848      0.699      1.265      0.206      -0.486       2.255
ar.L9         -0.2810      1.278     -0.220      0.826      -2.785       2.223
ar.L10        -0.1738      0.550     -0.316      0.752      -1.251       0.904
ma.L1          0.5007      1.674      0.299      0.765      -2.781       3.783
ma.L2         -0.4955      1.219     -0.407      0.684      -2.884       1.893
ma.L3          0.5515      0.585      0.943      0.346      -0.595       1.698
ma.L4          0.5197      1.248      0.417      0.677      -1.926       2.965
ma.L5         -0.6316      1.142     -0.553      0.580      -2.871       1.607
ma.L6          0.3860      0.726      0.531      0.595      -1.038       1.810
ma.L7          0.5651      0.855      0.661      0.509      -1.111       2.241
ma.L8         -0.4679      1.079     -0.434      0.665      -2.583       1.647
ma.L9         -0.3629      0.618     -0.587      0.557      -1.575       0.849
ma.L10         0.0409      0.399      0.102      0.918      -0.742       0.824
sigma2        30.4260      3.419      8.898      0.000      23.724      37.128
===================================================================================
Ljung-Box (L1) (Q):                   0.18   Jarque-Bera (JB):                 5.18
Prob(Q):                              0.67   Prob(JB):                         0.07
Heteroskedasticity (H):               0.85   Skew:                            -0.09
Prob(H) (two-sided):                  0.49   Kurtosis:                         3.74
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
/opt/anaconda3/lib/python3.8/site-packages/statsmodels/base/model.py:566: ConvergenceWarning: Maximum Likelihood optimization failed to converge. Check mle_retvals
  warnings.warn("Maximum Likelihood optimization failed to "
# evaluate an ARIMA model using a walk-forward validation
from pandas import read_csv
from pandas import datetime
from matplotlib import pyplot
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error
from math import sqrt
# load dataset

FB_values = FB.history(start="2020-06-01")
train_values=FB_values[:len(FB_values)-1000]
test_values=FB_values[len(FB_values)-1000:]


test_values=test_values['Close']
train_values=train_values['Close']
history = [x for x in train_values]

history
# split into train and test sets
predictions = list()

# walk-forward validation
for t in range(len(test_values)):
    model = ARIMA(history, order=(1,0,1))
    model_fit = model.fit()
    output = model_fit.forecast(steps=1)
    yhat = output[0]
    predictions.append(yhat)
    obs = test_values[t]
    history.append(obs)
    #print('predicted=%f, expected=%f' % (yhat, obs))
# evaluate forecasts
rmse = sqrt(mean_squared_error(test_values, predictions))
print('Test RMSE: %.3f' % rmse)
<ipython-input-4-f5b348368ee8>:3: FutureWarning: The pandas.datetime class is deprecated and will be removed from pandas in a future version. Import from datetime module instead.
  from pandas import datetime
Test RMSE: 7.590
# plot forecasts against actual outcomes
predictions=pd.DataFrame(predictions)
test_values=pd.DataFrame(test_values)
predictions.index=test_values.index
pyplot.plot(test_values)
plt.title("predictions")
pyplot.plot(predictions, color='red')
pyplot.show()
../_images/cf257e748c659b62ab39844b5d618de62c978d30e5f444557277e67c2bf7a677.png