Pkg.add()
# Pkg.add("EconDatasets")
# Pkg.add("TimeData")
using EconDatasets
using TimeData
using Dates
using Gadfly
using Distributions
using JuMP
using NLopt
using Base.Test
Warning: New definition get(Any,Colon) at /home/jovyan/.julia/v0.3/TimeData/src/abstractFuncs.jl:77 is ambiguous with: get(Nullable{T},Any) at /home/jovyan/.julia/v0.3/Compat/src/nullable.jl:39. To fix, define get(Nullable{T},Colon) before the new definition. Warning: New definition get(Any,Colon) at /home/jovyan/.julia/v0.3/TimeData/src/abstractFuncs.jl:77 is ambiguous with: get(OrderedDict{K,V},Any...) at /home/jovyan/.julia/v0.3/DataStructures/src/delegate.jl:11. To fix, define get(OrderedDict{K,V},Colon) before the new definition. Warning: New definition get(Any,Colon) at /home/jovyan/.julia/v0.3/TimeData/src/abstractFuncs.jl:77 is ambiguous with: get(DefaultDictBase{K,V,F,D},Any...) at /home/jovyan/.julia/v0.3/DataStructures/src/delegate.jl:11. To fix, define get(DefaultDictBase{K,V,F,D},Colon) before the new definition. Warning: New definition get(Any,Colon) at /home/jovyan/.julia/v0.3/TimeData/src/abstractFuncs.jl:77 is ambiguous with: get(DefaultDict{K,V,F},Any...) at /home/jovyan/.julia/v0.3/DataStructures/src/delegate.jl:11. To fix, define get(DefaultDict{K,V,F},Colon) before the new definition. Warning: New definition get(Any,Colon) at /home/jovyan/.julia/v0.3/TimeData/src/abstractFuncs.jl:77 is ambiguous with: get(DefaultOrderedDict{K,V,F},Any...) at /home/jovyan/.julia/v0.3/DataStructures/src/delegate.jl:11. To fix, define get(DefaultOrderedDict{K,V,F},Colon) before the new definition.
tickerSymbs = ["^NDX"
"^GSPC"
"^STOXX50E"]
dates = Date(1960,1,1):Date(2015,3,30)
indexData = [readYahooFinance(dates, symb) for symb in tickerSymbs];
% Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 519k 0 519k 0 0 505k 0 --:--:-- 0:00:01 --:--:-- 506k % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 964k 0 964k 0 0 993k 0 --:--:-- --:--:-- --:--:-- 993k % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 385k 0 385k 0 0 536k 0 --:--:-- --:--:-- --:--:-- 536k
## display first five dates of NASDAQ
indexData[2][1:5, :]
Timematr{Date}
Dimensions: (5, 6)
From: 1950-01-03, To: 1950-01-09
idx | Open | High | Low | Close | Volume | Adj_Close | |
---|---|---|---|---|---|---|---|
1 | 1950-01-03 | 16.66 | 16.66 | 16.66 | 16.66 | 1.26e6 | 16.66 |
2 | 1950-01-04 | 16.85 | 16.85 | 16.85 | 16.85 | 1.89e6 | 16.85 |
3 | 1950-01-05 | 16.93 | 16.93 | 16.93 | 16.93 | 2.55e6 | 16.93 |
4 | 1950-01-06 | 16.98 | 16.98 | 16.98 | 16.98 | 2.01e6 | 16.98 |
5 | 1950-01-09 | 17.08 | 17.08 | 17.08 | 17.08 | 2.52e6 | 17.08 |
lookUpIndices = {:nasdaq => 1, :sp500 => 2, :stoxx => 3}
Dict{Any,Any} with 3 entries: :nasdaq => 1 :sp500 => 2 :stoxx => 3
chosenIndex = :nasdaq
:nasdaq
indexPriceData = indexData[lookUpIndices[chosenIndex]][:Adj_Close]
rename!(indexPriceData.vals, :Adj_Close, chosenIndex)
indexPriceData[1:5, :]
Timematr{Date}
Dimensions: (5, 1)
From: 1985-10-01, To: 1985-10-07
idx | nasdaq | |
---|---|---|
1 | 1985-10-01 | 112.14 |
2 | 1985-10-02 | 110.825 |
3 | 1985-10-03 | 110.87 |
4 | 1985-10-04 | 110.075 |
5 | 1985-10-07 | 108.2 |
TimeData
instances:loadPlotting()
wstHist (generic function with 4 methods)
gdfPlot(indexPriceData)
gdfPlot(log(indexPriceData))
function calcDiscRet(tm::Timematr)
return 100*(tm[2:end, :] .- tm[1:(end-1), :])./tm[1:(end-1), :]
end
function calcLogRet(tm::Timematr)
return 100*(log(tm[2:end, :]) .- log(tm[1:(end-1), :]))
end
calcLogRet (generic function with 1 method)
indexRetData = calcLogRet(indexPriceData)
indexRetData[1:5; :]
Timematr{Date}
Dimensions: (5, 1)
From: 1985-10-02, To: 1985-10-08
idx | nasdaq | |
---|---|---|
1 | 1985-10-02 | -1.18 |
2 | 1985-10-03 | 0.041 |
3 | 1985-10-04 | -0.72 |
4 | 1985-10-07 | -1.718 |
5 | 1985-10-08 | -0.966 |
gdfPlot(indexRetData)
logRets = asArr(indexRetData, Float64)[:];
plot(x=logRets, Geom.histogram(bincount = 100))
pdf:
$$ \begin{equation*} f(x_{i};\mu,\sigma)=\frac{1}{\sqrt{2 \pi \sigma^{2}}}\exp\left(-\frac{(x_{i}-\mu)^{2}}{2\sigma^{2}}\right) \end{equation*} $$function norm_negLlh(x::Array{Float64, 1}, mu::Float64, sigma::Float64)
llhs = -0.5*log(sigma^2*2*pi)-0.5*(x-mu).^2/sigma^2
return return -sum(llhs)
end
norm_negLlh (generic function with 1 method)
normDist = Normal(0., 1.0)
@test_approx_eq -sum(log(pdf(normDist, logRets))) norm_negLlh(logRets, 0., 1.)
count = 0 # keep track of number of function evaluations
function objFunc(x::Vector, grad::Vector)
if length(grad) > 0
end
global count
count::Int += 1
# println("f_$count($x)")
mu = x[1]
sigma = x[2]
return norm_negLlh(logRets, mu, sigma)
end
objFunc (generic function with 1 method)
opt = Opt(:LN_COBYLA, 2)
lower_bounds!(opt, [-Inf, 0.])
xtol_rel!(opt,1e-8)
min_objective!(opt, objFunc)
(minf,minx,ret) = optimize(opt, [0.0, 1.0])
println("got $minf at $minx after $count iterations (returned $ret)")
got 14526.376576784487 at [0.04915571949192298,1.7076314260696241] after 67 iterations (returned XTOL_REACHED)
normFit = fit(Normal, logRets)
Normal(μ=0.0491557033633773, σ=1.7076314274539048)
muHat, sigmaHat = minx
muHat_builtin, sigmaHat_builtin = params(normFit)
@test_approx_eq_eps muHat muHat_builtin 0.0000001
@test_approx_eq_eps sigmaHat sigmaHat_builtin 0.0000001
function ecdfVals(x::Array{Float64, 1})
xVals = sort(x)
nObs = size(x, 1)
yVals = [1:nObs]./(1+nObs)
return (xVals, yVals)
end
xVals, yVals = ecdfVals(logRets);
# calculate values of normal cdf
nCdfVals = cdf(normFit, xVals)
plot(layer(x=xVals, y=nCdfVals, Geom.step),
layer(x=xVals, y=yVals, Geom.line, Theme(default_color=color("red"))))
normVals = quantile(normFit, yVals)
plot(layer(x=xVals, y=normVals, Geom.point,Theme(default_point_size=0.6mm)),
layer(x=[minimum(xVals), maximum(xVals)], y=[minimum(xVals), maximum(xVals)], Geom.line))
alpha = 0.98
logLosses = -logRets
varHS = quantile(logLosses, alpha)
3.8419575995586728
function empES(x::Array{Float64, 1}, alphas::Array{Float64, 1})
nVars = length(alphas)
varHS = quantile(x, alphas)
esHS = Array(Float64, nVars)
for ii=1:nVars
exceedances = x[x .> varHS[ii]]
esHS[ii] = mean(exceedances)
end
return esHS
end
empES(logLosses, [alpha])
1-element Array{Float64,1}: 5.42294
muL = -muHat
sigma = sigmaHat
varNorm = muL + sigma*quantile(Normal(), alpha)
3.4578904615592325
esNorm = muL + sigma*(pdf(Normal(), quantile(Normal(), alpha))/(1-alpha))
4.084860801591298
varNorm = quantile(normFit, alpha)
(varHS, varNorm)
(3.8419575995586728,3.5562018872574974)
hitsNorm = sum(logLosses .> varNorm)./size(logLosses, 1)
println("Instead of $(1-alpha) exceedances, we get:")
println(" using normality: $hitsNorm")
Instead of 0.020000000000000018 exceedances, we get: using normality: 0.026365348399246705
exceedInd = asArr(-indexRetData .> varNorm, Bool, false)[:]
exceedances = indexRetData[exceedInd, :]
noExceedances = indexRetData[!exceedInd, :]
plot(layer(x=idx(noExceedances), y=asArr(noExceedances, Float64),
Geom.point, Theme(default_point_size=0.5mm)),
layer(x=idx(exceedances), y=asArr(exceedances, Float64),
Geom.point, Theme(default_color=color("red"), default_point_size=0.5mm)))
backtesting VaR-calculations based on assumption of independent normally distributed losses generally leads to two patterns:
the results of the backtesting procedure indicate substantial model risk involved in the framework of assumed normally distributed losses
hitsHS = sum(logLosses .> varHS)./size(logLosses, 1)
println("Instead of $(1-alpha) exceedances, we get:")
println(" historical simulation: $hitsHS")
Instead of 0.020000000000000018 exceedances, we get: historical simulation: 0.02004304546677428
$\Rightarrow$ overfitting
$\Rightarrow$ is historical simulation the best VaR estimator?
Simulation study:
distr = TDist(3)
alphas = [0.99, 0.995, 0.999]
nReps = 100000
nObs = 2500
# preallocation
simVals = Array(Float64, nObs)
simVar = Array(Float64, nReps, length(alphas))
@time begin
for ii=1:nReps
simVals = rand(distr, nObs)
simVar[ii, :] = quantile(simVals, alphas)
end
end
elapsed time: 56.254995155 seconds (5627688420 bytes allocated, 14.34% gc time)
ps = [plot(x=simVar[:, ii][:], Geom.histogram(bincount = 100)) for ii=1:length(alphas)]
vstack(ps...)
trueVars = quantile(distr, alphas)
mserrors = Float64[mean((simVar[:, ii] - trueVars[ii]).^2) for ii=1:length(alphas)]
3-element Array{Float64,1}: 0.112282 0.334593 4.19842
versioninfo()
Julia Version 0.3.6 Commit a05f87b* (2015-01-08 22:33 UTC) Platform Info: System: Linux (x86_64-linux-gnu) CPU: Intel(R) Core(TM) i5-4210U CPU @ 1.70GHz WORD_SIZE: 64 BLAS: libopenblas (DYNAMIC_ARCH NO_AFFINITY Haswell) LAPACK: libopenblas LIBM: libopenlibm LLVM: libLLVM-3.3
Pkg.status()
19 required packages: - DataArrays 0.2.13 - DataFrames 0.6.3 - Dates 0.3.2 - Debug 0.1.2 - Distributions 0.7.0 - EconDatasets 0.0.2 - GLM 0.4.5 - Gadfly 0.3.11 - IJulia 0.2.3 - JuMP 0.9.0 - MAT 0.2.11 - NLopt 0.2.0 - Plotly 0.0.3+ master - Quandl 0.4.1 - RDatasets 0.1.2 - Taro 0.1.4 - TimeData 0.5.1 - TimeSeries 0.5.7 - Winston 0.11.9 57 additional packages: - ArrayViews 0.6.1 - BinDeps 0.3.11 - Blosc 0.1.2 - Cairo 0.2.26 - Calculus 0.1.6 - Codecs 0.1.3 - Color 0.4.4 - Compat 0.4.1 - Compose 0.3.11 - Contour 0.0.6 - Copulas 0.0.0- master (unregistered) - DataStructures 0.3.8 - Distances 0.2.0 - Docile 0.4.10 - DualNumbers 0.1.3 - Econometrics 0.0.0- master (unregistered) - FixedPointNumbers 0.0.7 - GZip 0.2.15 - GnuTLS 0.0.4 - Graphics 0.1.0 - Grid 0.3.8 - Gumbo 0.1.2 - HDF5 0.4.13 - HTTPClient 0.1.4 - Hexagons 0.0.2 - HttpCommon 0.0.12 - HttpParser 0.0.11 - ImmutableArrays 0.0.6 - IniFile 0.2.4 - Iterators 0.1.7 - JSON 0.4.2 - JavaCall 0.2.2 - KernelDensity 0.1.0 b2c9f7d6 (dirty) - LibCURL 0.1.4 - Loess 0.0.3 - MathProgBase 0.3.11 - Memoize 0.0.0 - NaNMath 0.0.2 - Nettle 0.1.8 - NumericFuns 0.2.3 - Optim 0.4.1 - PDMats 0.3.3 - REPLCompletions 0.0.3 - Reexport 0.0.2 - Requests 0.0.8 - Requires 0.1.1 - ReverseDiffSparse 0.2.7 - SHA 0.0.4 - Showoff 0.0.4 - SortingAlgorithms 0.0.4 - StatsBase 0.6.14 - Tk 0.3.1 - URIParser 0.0.5 - WoodburyMatrices 0.0.1 - WorldBankDataTd 0.0.0- master (unregistered) - ZMQ 0.1.17 - Zlib 0.1.8