Трендовый или средний возврат с помощью statsmodels.tsa.stattools
Большая часть кода ниже была "собрана" из разных источников. конечная цель состоит в том, чтобы определить цену закрытия акции как означающую возврат или тенденцию на как можно более короткие сроки. надежда состоит в том, что этот вопрос и ответ (ы) могут служить всеобъемлющим взглядом на предмет. к сожалению, вывод меня сбивает с толку / противоречив.
раздел 1: на основе значения p это выглядит как тенденция. исходя из критических значений, это выглядит как возвращение.
Раздел 2: все тренды, за исключением отставания в 50. Почему это означает возврат?
Раздел 3: все тренды. эти р-значения повсюду. как узнать, какой "тип" использовать? Нужно ли анализировать исходные данные, чтобы увидеть тренды? не в этом ли смысл СКАЗАТЬ нам, если это имеет тенденцию?
раздел 4: все тренды, кроме (лаг, автолаг) = (20, т-стат), (30, т-стат), (40, т-стат), (50, AIC). Зачем?
раздел 5: сильно значит возвращаться. все тесты выглядят близко.
Раздел 6: почему существуют множественные значения периода полураспада? какой из них прав?
Благодарю.
вход:
import pandas as pd
import numpy as np
import statsmodels.tsa.stattools as ts # time series library
from statsmodels.tsa.stattools import adfuller
import pandas_datareader.data as wb
import statsmodels.api as sm
from numpy import cumsum, log, polyfit, sqrt, std, subtract
from numpy.random import randn
#............................
df = wb.DataReader('GE','google', '2003, 5, 1', '2003, 12, 15')
print ('\n....... section 1 - adf statistic ......................')
name = pd.Series (df['Close'].values)
result = adfuller (name)
print('close - adf statistic: %f' % result[0] + ', p value: %f' % result[1])
if result[1] > 0.05:
print ('trending based on p value > 0.05')
else:
print ('mean reverting based on p value < 0.05')
print('critical values:')
for key, value in result[4].items():
print('\t%s: %.3f' % (key, value))
print ('if adf statistic > % test statistic, then trending at that %')
print ('\n..... section 2 - different lag times ...................')
lag_list = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50, 60]
for lag in lag_list:
name = pd.Series (df['Close'].values)
result = adfuller (name, maxlag = lag)
if result[1] > 0.05:
print ('p-value: %f' % result[1] + ' trending based on lag = ' + str(lag))
else:
print ('p-value: %f' % result[1] + ' mean reverting based on lag = ' + str(lag))
print ('\n..... section 3 - different regression types ..........')
regress_list = ['c','ct','ctt','nc']
for regress in regress_list:
name = pd.Series (df['Close'].values)
result = adfuller (name, regression = regress)
if regress == 'c':
print ('regression for close with ' + regress + ' = constant only, p-value: %f' % result[1])
if regress == 'ct':
print ('\nregression for close with ' + regress + ' = constant and trend, p-value: %f' % result[1])
if regress == 'ctt':
print ('\nregression for close with ' + regress + ' = constant and linear and quadratic trend, p-value: %f' % result[1])
if regress == 'nc':
print ('\nregression for close with ' + regress + ' = no constant, no trend, p-value: %f' % result[1])
if result[1] > 0.05:
print ('trending based on p value > 0.05')
else:
print ('mean reverting based on p value < 0.05')
print ('\n......... section 4 - different lag types')
alag_list = ['AIC', 'BIC', 't-stat']
for lag in lag_list:
for alag in alag_list:
name = pd.Series (df['Close'].values)
result = adfuller (name, maxlag = lag, autolag = alag)
print ('\nlag = ' + str(lag) + ', autolag = ' + alag + ', p-value = %f' % result[1])
if result[1] > 0.05:
print ('trending based on p value > 0.05')
else:
print ('mean reverting based on p value < 0.05')
print ('\n........ section 5 - hurst..........................')
def hurst(ts): # Returns the Hurst Exponent of the time series vector ts
lags = range(2, 100) # Create the range of lag values
# Calculate the array of the variances of the lagged differences
tau = [sqrt(std(subtract(ts[lag:], ts[:-lag]))) for lag in lags]
poly = polyfit(log(lags), log(tau), 1) # Use a linear fit to estimate the Hurst Exponent
return poly[0]*2.0 # Return the Hurst exponent from the polyfit output
gbm = log(cumsum(randn(100000))+1000) # geometric brownian motion
mr = log(randn(100000)+1000) # mean reverting
tr = log(cumsum(randn(100000)+1)+1000) # trending
# Output the Hurst Exponent for each of the above series and the price of the ticker's close for the ADF test
print ("hurst - geometric brownian motion - random walk - should be around 0.50: %s" % hurst(gbm))
print ("hurst - mean reverting - should be around 0.00): %s" % hurst(mr))
print ("hurst - trending - should be around 1.00): %s" % hurst(tr))
print ("hurst: %s" % hurst(df['Close']))
print ('\n....... section 6 - half life ..........................')
def get_halflife(s):
s_lag = s.shift(1)
s_lag.ix[0] = s_lag.ix[1]
s_ret = s - s_lag
s_ret.ix[0] = s_ret.ix[1]
s_lag2 = sm.add_constant(s_lag)
model = sm.OLS(s_ret,s_lag2)
res = model.fit()
halflife = round(-np.log(2) / res.params[1],0)
print ('half life = ' + str(halflife))
return halflife
df.apply(get_halflife)
print ('\nprogram complete')
выход:
.................... section 1 - adf statistic ......................
close - adf statistic: -1.959732, p value: 0.304504
trending based on p value > 0.05
critical values:
5%: -2.880
if adf statistic > % test statistic, then trending at that %
10%: -2.577
if adf statistic > % test statistic, then trending at that %
1%: -3.472
if adf statistic > % test statistic, then trending at that %
.................. section 2 - different lag times ...................
p-value: 0.304504 trending based on lag = 1
p-value: 0.304504 trending based on lag = 2
p-value: 0.304504 trending based on lag = 3
p-value: 0.304504 trending based on lag = 4
p-value: 0.304504 trending based on lag = 5
p-value: 0.304504 trending based on lag = 6
p-value: 0.304504 trending based on lag = 7
p-value: 0.304504 trending based on lag = 8
p-value: 0.304504 trending based on lag = 9
p-value: 0.304504 trending based on lag = 10
p-value: 0.304504 trending based on lag = 20
p-value: 0.304504 trending based on lag = 30
p-value: 0.304504 trending based on lag = 40
p-value: 0.009252 mean reverting based on lag = 50
p-value: 0.304504 trending based on lag = 60
.......... section 3 - different regression types ....................
regression for close with c = constant only, p-value: 0.304504
trending based on p value > 0.05
regression for close with ct = constant and trend, p-value: 0.588526
trending based on p value > 0.05
regression for close with ctt = constant and linear and quadratic trend, p-
value: 0.817117
trending based on p value > 0.05
regression for close with nc = no constant, no trend, p-value: 0.739828
trending based on p value > 0.05
........................ section 4 - different lag types
lag = 1, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 1, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 1, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 2, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 2, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 2, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 3, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 3, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 3, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 4, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 4, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 4, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 5, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 5, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 5, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 6, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 6, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 6, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 7, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 7, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 7, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 8, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 8, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 8, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 9, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 9, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 9, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 10, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 10, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 10, autolag = t-stat, p-value = 0.304504
trending based on p value > 0.05
lag = 20, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 20, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 20, autolag = t-stat, p-value = 0.046194
mean reverting based on p value < 0.05
lag = 30, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 30, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 30, autolag = t-stat, p-value = 0.009252
mean reverting based on p value < 0.05
lag = 40, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 40, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 40, autolag = t-stat, p-value = 0.009252
mean reverting based on p value < 0.05
lag = 50, autolag = AIC, p-value = 0.009252
mean reverting based on p value < 0.05
lag = 50, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 50, autolag = t-stat, p-value = 0.335375
trending based on p value > 0.05
lag = 60, autolag = AIC, p-value = 0.304504
trending based on p value > 0.05
lag = 60, autolag = BIC, p-value = 0.304504
trending based on p value > 0.05
lag = 60, autolag = t-stat, p-value = 0.335375
trending based on p value > 0.05
................ section 5 - hurst..........................
hurst - geometric brownian motion - random walk - should be around 0.50: 0.50270119846
hurst - mean reverting - should be around 0.00): 0.000107190651568
hurst - trending - should be around 1.00): 0.95085392982
hurst: 0.114851328923
.................. section 6 - half life ..........................
half life = 15.0
half life = 11.0
half life = 10.0
half life = 14.0
half life = 1.0
program complete