/* vi1a2.prg Table 1 Panel A */ /* */ /* december, 2008 */ /* */ /* volint paper with Corradi, Distaso and Swanson */ /* */ /* Simulation exercise program */ /* Simulation and Realized Measure Based Conditional Interval p-values*/ /* no noise or jumps */ /* */ format /MA1/RD 12,3; outwidth 255; output file = c:\vi1a2.out reset; output on; /* set up optimization routine */ /* library optmum; #include optmum.ext; optset; _opstmth = "bfgs stepbt nohess"; _opmdmth = "nohess"; _opgtol = 1e-1; _opmiter=200; __output = 0; */ /* Walter settings */ /* diffusion paper settings */ /* library optmum; _opstmth = "bfgs stepbt nohess"; __output=0; _opmdmth = "nohess"; _opmiter=200; _opgtol = 1e-6; __output = 2; */ /* set parameters of simulation */ simtot=1001; /* number of simulations */ M_max=576; /* 720 this is max intraday obs used in this prg */ M_tot=4; /* # diff't intraday variations to consider */ T_tot=2; /* # diff't daily sample sizes to consider */ hh=1; /* # of data simulated between each observed value */ num=0.125; /* # of IV SEs around mean of IV to build conf intvls */ SS_in1=3000; /* # of length 2 paths for pseudo true interval */ SS_in2=500; /* # of simulated paths of length 3 for sim based intvl */ ktype=2; /* if=1, use epanichenkov, if=2, use quartic (biweight) */ /* /* This next command used to generate the following initial value for sig2. */ /* This form of the gamm is an Erlang, as the second parameter is fixed, and */ /* hence a random draw can be taken from a 1 parm gamma x the 2nd parm */ /* sig2_ini=(eta^2/(2*kapp))*rndgam(1,1,((2*kapp*mu)/eta^2)); */ sig2_ini=6.22838923; xdat_ini=mm; kapp=1; mu=1; eta=1; mm=0.05; /* mm value does not affect sig2_ini if use mm=0.01 here */ */ /* sig2_ini=(eta^2/(2*kapp))*rndgam(1,1,((2*kapp*mu)/eta^2)); */ kapp=2.5; mu=1; eta=1; mm=0.05; /* mm value does not affect sig2_ini if use mm=0.01 here */ sig2_ini=mu; xdat_ini=mm; /* set global variables */ clearg rv,rvvar,rvcov1,rvcov2,err_in; clearg IV1N,htypea,stypea,NWlast; /* ================================================================================ */ /* ================================================================================ */ /* This next stuff is only needed for running simulations. */ /* NOTE: Keep first day errors same regardless of when program is run, i.e. use seed*/ /* When running simulations and simulating data, toss the first toss days of data */ /* and keep the first err_fix days of errors fixed, beyond the toss period. for */ /* simplicity, keep the first toss+err_fix days of errors fixed. */ /* This next data generation creates these fixed errors for a maximal hh and maximal*/ /* Td. the errors are called err_in. */ hh_max=hh; /* i.e. pretty fine data generation! */ Td_max=1152; toss_max=1; err_fix=1; seed=7749; rows1=(toss_max+err_fix)*Td_max*hh_max; err_in=rndns(rows1,2,seed); /* ================================================================================ */ /* ================================================================================ */ /* ******************************************************************************** */ /* ******************************************************************************** */ /* ---------------------------------------------- */ /* PROCEDURES */ /* ---------------------------------------------- */ /* ******************************************************************************** */ /* ******************************************************************************** */ /* various procedures for kernel density calculations */ /* Some kernel functions. All functions take an n x k matrix u as their argument and return an n x k matrix with the function evaluated in each point of x and an n x k matrix d with the derivative of the function in each point of x. k_bw: biweight kernel function k_epan: Epanechnikov kernel k_gauss: Gaussian kernel k_triang: triangular kernel k_rect: rectangular kernel usage: {f,d}=k_bw(u); */ proc (2)=k_bw(u); local select; select = abs(u).<=1; retp( (15/16)*((1-u.^2).^2).*select, -15/4*u.*(1-u.^2).*select); endp; proc (2)=k_gauss(u); retp( pdfn(u), -u.*pdfn(u) ); endp; proc (2)=k_epan_old(u); local c, s; s = abs(u).=0) + (u.<=0)).*s ); endp; /* bandw1 procedure to calculate the optimal bandwidth in kernel estimation of a density. The optimal bandwidth is calculated according to eq. 3.31 of Silverman (1986) usage: h=bandw1(y); input: y: n-vector whose density will be estimated; output: h: scalar, optimal bandwidth choice; */ proc bandw1(y); local s, ys,n, a, iqr, qi1, qi3; if (cols(y)>1); errorlog "input error in bandw1.g: too many columns"; retp( -1 ); endif; s=sqrt(vcx(y)); n=rows(y); ys=sortc(y,1); qi1=round(0.25*n); qi3=round(0.75*n); iqr=ys[qi3]-ys[qi1]; retp( 0.9*minc(s|(iqr/1.34))/n^0.2 ); endp; /* procedure to calculate a univariate kernel density estimate and its derivative usage: {f, d, h} = ukernel(x, z, h, w, &kf); input: x: T vector where density is to be estimated z: n vector with observed data points h: scalar bandwidth, if h<=0 bandwidth is determined by procedure bandw1(z) w: n vector with weights &kf: pointer to weighting function output: f: T vector with estimated density d: T vector with estimated derivative h: scaler bandwidth */ proc (3)=ukernel(x, z,h,w, &kf); local arg, i, n, f, d, k, kff, kfd, pkff,argl,argh,kffl,kffh,fl,fh, hl; local kf:proc; /* error checking here */ if (cols(x)>1); errorlog "ukernel.g: x has too many columns"; retp(-1,-1,-1 ); endif; if (cols(z)>1); errorlog "ukernel.g: z has too many columns"; retp( -1,-1,-1 ); endif; /* initialization */ i = 1; n = rows(x); k = cols(x); /* determine bandwidth */ if (h<=0); h=bandw1(z); endif; f=zeros(n,1); d=zeros(n,1); do while (i<=n); arg = (x[i]-z)/h; {kff, kfd} = kf(arg); f[i] = meanc(kff.*w)/h; d[i] = meanc(kfd.*w)/(h^2); i=i+1; endo; retp( f,d,h ); endp; /* procedure to calculate a multivariate kernel density estimate and its derivative usage: {f, d, h} = mkernel(x, z, h, w, &kf); input: x: T x k matrix where density is to be estimated z: n x k matrix with observed data points h: scalar, bandwidth parameter (same bandwidth for all components), if h<=0 it is determined by eq 4.14 of Silverman w: n vector with weights &kf: pointer to weighting function output: f: T vector with estimated density d: T x k matrix with estimated derivatives h: scalar, bandwidth used */ proc (3)=mkernel(x, z, h, w, &kf); local arg,i,n,f,d,k,kff,kfd,pkff,p,deneq0; local kf:proc; i = 1; n = rows(x); k = cols(x); if (h<=0); p=1/(k+4); h=(4/(k+2))^p*n^(-p); endif; f = zeros(n,1); d = zeros(n,k); do while (i<=n); arg = (x[i,.]-z)/h; {kff, kfd} = kf(arg); pkff = prodc(kff'); f[i] = meanc(pkff.*w)/(h^k); deneq0=kff .==0; kff=kff+deneq0; d[i,.] = meanc((kfd.*pkff.*w)./kff)'/(h^(k+1)); i = i+1; endo; retp( f,d,h ); endp; /* ================================================================================ */ /* ================================================================================ */ /* interval calculation Fhat(u2|.)-Fhat(u1|.) = fb/fu below */ /* */ /* Inputs: */ /* uk - is vector of evaluation points for joint */ /* IV1Nk_0 is IV_{1,N}^(theta_dagger} i.e. the conditioning volat. evaluation point */ /* RMtk is the RM_t data (or IV_t data if sim method used) that is summed across */ /* if stype=0; data is one column corresponding to one long data sample of RM_t */ /* if stype=1; data is Sx2 matrix; S simulations, each of length 2 */ /* with column ordering t-1 and then t */ /* if ktype=1 then use epanichenkov, if =2 use quartic */ proc (4) = int_kern(uk,IV1Nk,RMtk,stype,ktype); local dat1k,dat2k,h1k,h2k,x,z,w,h,fu,du,hu,fb,db,hb,fint,flag_int,funi; /* split RMtk into two columns if stype=0 so that always have two columns */ /* hereafter, regardless of whether doing actual data or simulation version */ if stype==0; dat1k=RMtk[2:rows(RMtk),.]; dat2k=RMtk[1:rows(RMtk)-1,.]; else; dat1k=RMtk[.,2]; dat2k=RMtk[.,1]; endif; /* calculate bandwidth parameters -- there are three options */ h1k=bandw1(dat2k); /* for univariate */ h2k=((rows(dat2k))^(-1/6))*stdc(dat2k); /* for bivariate */ /*h1k=((4/3)^(1/5))*((rows(dat1k))^(-1/5))*stdc(dat1k); altrntv univariate */ /* univariate density calculations */ x=IV1Nk; z=dat2k; w=ones(rows(z),1); h=h1k; if ktype==1; {fu, du, hu} = ukernel(x, z, h, w, &k_epan); endif; if ktype==2; {fu, du, hu} = ukernel(x, z, h, w, &k_bw); endif; funi=fu; /* bivariate density calculations */ /* old!!! old code does 2 dim (bivar) kernal calcas in JoE paper x=(uk~((ones(rows(uk),1))*IV1Nk))*scalfac; z=(dat1k~dat2k)*scalfac; w=ones(rows(z),1); h=h2k*scalfac; {fb, db, hb} = mkernel(x, z, h, w, &k_epan); */ /* new numerator for interval, with index function */ fb=numercal(uk[1],uk[2],ktype); /* interval calculation for a given IV1N value */ fint=fb/fu; flag_int=0; /* if maxc(fint) >1; flag_int=1; print "f_int > 1 !!"; goto finit; endif; */ if fu==0; flag_int=1; print "f_uni == 0 !!"; goto finit; endif; finit: retp(fint,funi,h1k,flag_int); endp; /* ================================================================================ */ /* Numerator of interval calculation Fhat(u2|.)-Fhat(u1|.) = fb for use in the */ /* calculation of fb/fu in procedure int_kern */ /* */ /* Inputs: */ /* uk - called `u' in paper, arg in the indicator function part of the expression */ /* RMtk is the RM_t data (or IV_t data if sim method used) that is summed across */ /* if stype=0; data is one column corresponding to one long data sample of RM_t */ /* if stype=1; data is Sx2 matrix; S simulations, each of length 2 */ /* with column ordering t-1 and then t */ /* IV1Nk_0 is IV_{1,N}^(theta_dagger} i.e. the conditioning volatility */ /* if ktype=1 then use epanichenkov, if =2 use quartic */ proc numercal(uk1,uk2,ktype); /* these are global inputs -> (rv=RM1a,b,c, or d),IV1N,htypea,stypea */ local nk,dat1k,dat2k,h1ka,temp2,fjoint; nk=rows(rv); /* split rv into two columns if stype=0 so that always have two columns */ /* hereafter, regardless of whether doing actual data or simulation version */ if stypea==0; dat1k=rv[2:nk,.]; dat2k=rv[1:nk-1,.]; nk=nk-1; else; dat1k=rv[.,2]; dat2k=rv[.,1]; endif; h1ka=bandw1(dat2k); /* for univariate */ /* construct numerator term */ if ktype==1; temp2= ( (dat1k.>=uk1) .and (dat1k.<=uk2) ).*((1-((IV1N-dat2k)/h1ka)^2).*(abs((IV1N-dat2k)/h1ka) .le 1)); fjoint=(1/nk)*(1/h1ka)*(3/4)*sumc(temp2); endif; if ktype==2; temp2=( (dat1k.>=uk1) .and (dat1k.<=uk2) ).*(((1-((IV1N-dat2k)/h1ka)^2)^2).*(abs((IV1N-dat2k)/h1ka) .le 1)); fjoint=(1/nk)*(1/h1ka)*(15/16)*sumc(temp2); endif; retp(fjoint); endp; /* ================================================================================== */ /* Realised Kernel: The Tukey-Hamming kernel function. The function takes an n x k matrix u as its argument and returns an n x k matrix with the function evaluated in each point of u. usage: f=k_th(u); */ proc k_th(u); retp( (1-cos(pi*(1-u)^2))/2 ); endp; /* Realised Kernel: The autocorrelation function of returns. The function returns a T x 1 vector of covariances at lag l and T x 1 vector of optima bandwidths . usage: {H,gammah}=gammah(Xdata,Xdatastar,l,T,M,Mstar); input: Xdata: TM-vector of log-prices; Xdatastar: TM*-vector of log-prices; l: scalar, number of lags; T: scalar, number of days; M: scalar, number of intradaily observations; Mstar: scalar; output: H: T-vector of optimal bandwidths; gammah: T-vector, daily covariances at lag l. */ proc(2) = gammah(Xdata,Xdatastar,l,T,M,Mstar); local RM1, omega2, sigma2, c, H, prodvt, it, Xdatar, h2; {RM1}=RM1_calcs(Xdata,1,M,T,0); omega2=RM1/(2*M); {RM1}=RM1_calcs(Xdatastar,1,Mstar,T,0); sigma2=RM1; c=((omega2^(1/2))./(sigma2^(1/2)))*5.74; H=c*M^(1/2); H=trunc(H); Xdatar=reshape(Xdata,T,M); prodvt=zeros(T,1); it=1;do until it>T; h2=H[it]; if abs(l)>h2; prodvt[it]=0; else; prodvt[it]= sumc(((Xdatar[it,h2+2:M-h2]-Xdatar[it,h2+1:M-h2-1]).*(Xdatar[it,h2+2-l:M-h2-l]-Xdatar[it,h2+1-l:M-h2-l-1]))'); endif; it=it+1;endo; retp(H,prodvt); endp; /* The realised kernel estimator of integrated variance. The function returns a T x 1 vector of realised estimators. usage: {ker,kervar,kercov1,kercov2}=realker(Xdata,Xdatastar,T,M,Mstar,mom); input: Xdata: TM-vector of log-prices; Xdatastar: TM*-vector of log-prices; T: scalar, number of days; M: scalar, number of intradaily observations; Mstar: scalar; mom: scalar; if 1, some moments are calculated; output: {ker,kervar,kercov1,kercov2}: T-vector of realised estimators. */ proc(4) = realker(Xdata,Xdatastar,T,M,Mstar,mom); local H, H1, gamma0, it, prodvt, ih, prodv, ktilde, ker, kervar, kercov1, kercov2, gamih, gammih, h2; ker={}; kervar={}; kercov1={}; kercov2={}; {H,gamma0}=gammah(Xdata,Xdatastar,0,T,M,Mstar); prodvt=zeros(T,1); it=1;do until it>T; h2= H[it]; prodv=zeros(h2,1); ih=1; do until ih > h2; {H1,gamih}=gammah(Xdata,Xdatastar,ih,T,M,Mstar); {H1,gammih}=gammah(Xdata,Xdatastar,-ih,T,M,Mstar); prodv[ih]=k_th((ih-1)/h2)*(gamih[it]+gammih[it]); ih=ih+1;endo; prodvt[it]=sumc(prodv); it=it+1;endo; ktilde=gamma0+prodvt; ker=(1./(1-2*H/M)).*ktilde; if mom==1; kervar=(ker-meanc(ker))^2; kercov1=(ker[2:T]-meanc(ker)).*(ker[1:T-1]-meanc(ker)); kercov2=(ker[3:T]-meanc(ker)).*(ker[1:T-2]-meanc(ker)); endif; retp(ker,kervar,kercov1,kercov2); endp; /* ================================================================================ */ /* Data Simulation Using Heston Model */ /* */ /* Inputs: */ /* toss is the number of days of extra instantaneous obs to generate to be tossed */ /* to get rid of starting value effects. */ /* err_fix is the number of days of initial observations outputed for which errors */ /* used in data generation are to be fixed. Notice that toss + err_fix days */ /* worth of errors must appear as a global variable called err_in. */ /* TTis the number of days of observations to produce. */ /* Td is the number of observations at the given observed frequency to */ /* simulate during each day, say 8hours*12=96 if using 5 minute observation freq */ /* hhd is the 'instantaneous' increment (e.g. 10000 if 10000 observations */ /* are to be simulated inbetween each increment within each day TT - so in the above*/ /* example, there would be 10000 observations generated WITHIN each 5 minute */ /* observational window, so that to get the 5 minute data, only each 10000th */ /* observation is sampled when producing the X data for output. */ /* sig2_ini and xdat_ini are initial values for volatility and observed variable */ /* The Heston model variant used is: */ /* kapp,mu,eta,mm are as follows: */ /* dxdat(t) = mm*dt + sqrt(sig2)dW(1,t) */ /* dsig2(t) = kapp(mu-sig2(t))dt +eta*sqrt(sig2)dW(2,t) , kapp > 0 */ /* f_inst is a flag =1 if want to output instantaneous sigma2 values, of which */ /* first err_fix of these are constructed using fixed errors across simulations */ /* Notice that for given initial values, `toss' initial observations are discarded */ proc (3) = sim_data(toss,err_fix,TT,Td,sig2_ini,xdat_ini,hhd,kapp,mu,eta,mm,f_inst); local i,sig2,xdat,sig2L,xdatL,err,outsig2,outxdat,cmplx_fl,i_tot1,i_tot2,sig2sum,Tdhh; TT=TT+toss; /* increase number of days to account for tossed days */ outsig2={}; outxdat={}; sig2sum=0; sig2L= sig2_ini; xdatL= xdat_ini; /* Notice below that we are actually fixing the first err_fix+toss */ /* days of errors, rather than just err_fix days. however, as toss days are */ /* discarded before outputing any data, then we are effectively only fixing */ /* the first err_fix days M = Td = intraday obs. */ i_tot1=TT*Td*hhd; i_tot2=(toss+err_fix)*Td*hhd; Tdhh=1/(Td*hhd); i=1; do while i<=i_tot1; if i<=i_tot2; err=err_in[i,1:2]'; else; err=rndn(1,2)'; endif; sig2 = sig2L + (kapp*(mu-sig2L)*Tdhh) - ((1/4)*Tdhh*(eta^2)) + (eta*sqrt(sig2L)*sqrt(Tdhh)*(err[1])) + ((1/4)*(eta^2)*Tdhh*(err[1]^2)); xdat = xdatL + mm*Tdhh + (sqrt(sig2)*sqrt(Tdhh)*err[2]); /* error increment is inverse of num obs in a day that'r simulated = (hhd*Td) */ sig2sum=sig2sum+sig2; sig2L=sig2; xdatL=xdat; /*print xdat~sig2;*/ /* output TT days, each with Td observations, for total TT*Td obs. of X & sig */ if i%(hhd)==0; outxdat=outxdat|xdat; if f_inst==1; outsig2=outsig2|(sig2sum/hhd); /* integrated Td frequency per day vol */ endif; sig2sum=0; cmplx_fl=iscplx(xdat); if cmplx_fl==1; goto doneit1; endif; endif; i=i+1; endo; /* calculate the pseudo true integrated volatility (IV) for TT days! */ if f_inst==1; outsig2=reshape(outsig2,TT,Td); outsig2=meanc(outsig2'); /* actual IV in a vector of dimension (TT x 1) */ outsig2=outsig2[toss+1:TT]; endif; if (sumc(outsig2 .gt 0)) == rows(outsig2); else; cmplx_fl==1; print "Negative sigma2==========================="; goto doneit1; endif; /* collect the X data to output */ outxdat=outxdat[(toss*Td)+1:TT*Td]; doneit1: /*print " ==="; print outxdat; print "====="; print outsig2;*/ retp(outxdat,outsig2,cmplx_fl); endp; /* ================================================================================ */ /* HAC Newey-West COV estimator */ /* */ /* Purpose: Calculate covariance matrix of sqrt(T)*sample average. */ /* */ /* Input: hhat - NxK matrix of moment functions */ /* pp - order of autoregression */ /* PrintIt - if 1 then the results will be printed */ /* */ /* Output: Shat - covariance matrix of sumc( hhat/sqrt(T) ), */ /* that is the covariance matrix of sqrt(T)*mean(hhat) */ /* Note: Set pp=q=int(rows(hhat)^(1/6)) in main program. */ proc Nwywest(hhat,pp,PrintIt); local N,df,m,Omega0,jj,Shat,Omegaj,titl,flag1; hhat = hhat - meanc(hhat)'; /*Normalizing to E[hhat]=0*/ N = rows(hhat); df = N - cols(hhat); /*Degrees of freedom*/ m = minc( pp|(N-2) ); Omega0 = hhat'hhat/N; /*(KxN)*(NxK)*/ jj = 1; Shat = Omega0; do until jj > pp; Omegaj = hhat[jj+1:N,.]'hhat[1:N-jj,.]/N; /*Sum[h(t)*h(t-jj)',t=jj+1,T]*/ Shat = Shat + ( 1 - jj/(m+1) ) * (Omegaj + Omegaj'); jj = jj + 1; endo; if PrintIt == 1; "\lNewey-West Covariance matrix of sqrt(T)*sample mean"; flag1 = printfm( Shat,1,("*.*lf")~8~3 ); endif; retp(Shat); endp; /* ================================================================================ */ /* (S)GMM Estimation for the Heston Model */ /* */ /* Inputs: */ /* */ /* */ /* */ /* The Heston model variant used is: */ /* dxdat(t) = mm*dt + sqrt(sig2)dW(1,t) */ /* dsig2(t) = kapp(mu-sig2(t))dt +eta*sqrt(sig2)dW(2,t) , kapp > 0 */ /* Notice that the initial parameter values are ================================== */ /* Notice also that the relevant commands to call the GMM estimator in the main is: */ /* {bhat,min,grad,codefirst}=optmum(&innprod,(mu_0|eta_0|kapp_)); */ /* mu=bhat[1];eta=bhat[2];kapp=bhat[3]; */ /* Notice that require global use and precalculation of rv rvvar rvcov1 rvcov2 */ /* Notice that as there are 4 moments here, can also estimate mm if modify to SGMM. */ /* Notice that in order to generalize to SGMM, must simply replace the closed form */ /* moments with rv,rvvar,rvcov1,rvcov2 type moments constructed based on simulated */ /* data. Alternatively, we could also grid across mm and use SGMM for the rest, */ /* choosing mm that ultimately yields lowest optimum value for minimization problem */ proc sgmm(b); local ivmeanopt,ivvaropt,ivcov1opt,ivcov2opt,g1subT,g2subT,g3subT,g4subT,gsubT,gsubTall, NW,NWlags,invNW,flagnw,chucksim; /* calculate closed form moments */ /* here, b[1]=mu, b[2]=eta, b[3]=kapp */ ivmeanopt=b[1]; ivvaropt=(b[1]*b[2]^2)/(b[3]^3)*(exp(-b[3])+b[3]-1); ivcov1opt=(b[1]*b[2]^2)/(2*b[3])*((1-exp(-b[3]))^2)/(b[3]^2); ivcov2opt=(b[1]*b[2]^2)/(2*b[3])*exp(-b[3])* ((1-exp(-b[3]))^2)/(b[3]^2); /* use data based moments to construct components over which to minimize */ /* bring in simulated rv,rvvar, etc. in next lines if doing SGMM and use to replace */ /* ivmeanopt, ivvaropt, etc. with simulation based counterparts */ g1subT=meanc(rv)- ivmeanopt; g2subT=meanc(rvvar)- ivvaropt; g3subT=meanc(rvcov1)- ivcov1opt; g4subT=meanc(rvcov2)- ivcov2opt; gsubT=(g1subT|g2subT|g3subT|g4subT); /* calculate weight matrix version and output result */ gsubTall=((rv[3:rows(rv)]-ivmeanopt) ~ (rvvar[3:rows(rv)] - ivvaropt) ~ (rvcov1[2:rows(rvcov1)]-ivcov1opt) ~ (rvcov2[1:rows(rvcov2)]-ivcov2opt) ); NWlags=int(rows(rv)^(1/6)); /*NWlags=0;*/ if NWlags <1; print "ERROR!!! NWlags = " NWlags; endif; NW=nwywest(gsubTall,NWlags,0); /* NWlags is number of lags to use */ trap 1; invNW=inv(NW); flagnw = scalerr(invNW); if flagnw == 0; NWlast=NW; retp(gsubT'*inv(NW)*gsubT); else; print "a non-invertible NW!!"; retp(gsubT'*inv(NWlast)*gsubT); endif; /* calculate non-weight matrix version and output result */ /*retp(gsubT'*gsubT);*/ endp; /* ================================================================================ */ /* RM1 Calculations */ /* */ /* Inputs: */ /* Xdata -- This is the raw log level data */ /* int_RM is the number of intraday observations = Td */ /* T_RM is the sample length of realized measure (RM) output series */ /* mom_yn is a flag -- 0 = no moment data calculated, -- 1 = moment data calculated */ proc (1) = RM1_calcs(Xdata,RMoption,int_RM,T_RM,mom_yn); local RM1; RM1={}; /* calculate standard realized volatility (RM1), length T_RM */ Xdata = reshape(Xdata,T_RM,int_RM); RM1=sumc(((Xdata[.,2:int_RM]-Xdata[.,1:int_RM-1])^2)'); retp(RM1); endp; /* ================================================================================ */ /* RM Calculations */ /* */ /* Inputs: */ /* Xdata -- This is the raw log level data */ /* RMoption -- 1 to caculate only realized volatility measure */ /* -- 2 to calculate only bi-power volatility measure */ /* as well as tripower variation */ /* -- 3 to calculate only old sub-sample volatility measure - ReStud paper */ /* -- 4 to calculate only sub-sample volatility measure */ /* -- 5 to calculate realised kernel type volatility measure */ /* -- 6 to calculate all three measures, in above order */ /* int_RM is the number of intraday observations = Td */ /* T_RM is the sample length of realized measure (RM) output series */ /* mom_yn is a flag -- 0 = no moment data calculated, -- 1 = moment data calculated */ proc (24) = RM_calcs(Xdata,RMoption,int_RM,T_RM,mom_yn,Xstar,Mstar); local RM1,RM1var,RM1cov1,RM1cov2,RM2,RM2var,RM2cov1,RM2cov2,RM9,RM9var,RM9cov1,RM9cov2, RM3,RM3var,RM3cov1,RM3cov2,RM4,RM4var,RM4cov1,RM4cov2,M,kbar,kvar,subs,ir,RM_i,a_i,l,B, subvecrv,inds,subvecss,rvssir,rvavg,nui,RM5,RM5var,RM5cov1,RM5cov2; RM1={}; RM1var={}; RM1cov1={}; RM1cov2={}; RM2={}; RM2var={}; RM2cov1={}; RM2cov2={}; RM9={}; RM9var={}; RM9cov1={}; RM9cov2={}; RM3={}; RM3var={}; RM3cov1={}; RM3cov2={}; RM4={}; RM4var={}; RM4cov1={}; RM4cov2={}; RM5={}; RM5var={}; RM5cov1={}; RM5cov2={}; /* calculate standard realized volatility (RM1), length T_RM */ Xdata = reshape(Xdata,T_RM,int_RM); if (RMoption==1 or RMoption==6); RM1=sumc(((Xdata[.,2:int_RM]-Xdata[.,1:int_RM-1])^2)'); if mom_yn==1; RM1var=(RM1-meanc(RM1))^2; RM1cov1=(RM1[2:T_RM]-meanc(RM1)).*(RM1[1:T_RM-1]-meanc(RM1)); RM1cov2=(RM1[3:T_RM]-meanc(RM1)).*(RM1[1:T_RM-2]-meanc(RM1)); endif; endif; /* calculate bi-power variation realized volatility (RM2), length T_RM */ if (RMoption==2 or RMoption==6); RM2=(1/(sqrt(2)*gamma(1)/gamma(.5))^2)* sumc((abs(Xdata[.,3:int_RM]-Xdata[.,2:int_RM-1]).*abs(Xdata[.,2:int_RM-1]-Xdata[.,1:int_RM-2]))'); if mom_yn==1; RM2var=(RM2-meanc(RM2))^2; RM2cov1=(RM2[2:T_RM]-meanc(RM2)).*(RM2[1:T_RM-1]-meanc(RM2)); RM2cov2=(RM2[3:T_RM]-meanc(RM2)).*(RM2[1:T_RM-2]-meanc(RM2)); endif; RM9=( 1 / ( (2^(1/3))*gamma(5/6)/gamma(.5) )^3 )* sumc( ( ((abs(Xdata[.,4:int_RM]-Xdata[.,3:int_RM-1]))^(2/3)) .* ((abs(Xdata[.,3:int_RM-1]-Xdata[.,2:int_RM-2]))^(2/3)) .* ((abs(Xdata[.,2:int_RM-2]-Xdata[.,1:int_RM-3]))^(2/3)) )'); if mom_yn==1; RM9var=(RM9-meanc(RM9))^2; RM9cov1=(RM9[2:T_RM]-meanc(RM9)).*(RM9[1:T_RM-1]-meanc(RM9)); RM9cov2=(RM9[3:T_RM]-meanc(RM9)).*(RM9[1:T_RM-2]-meanc(RM9)); endif; endif; /* calculate old sub-sampled realized volatility (RM3), length T_RM */ if (RMoption==3 or RMoption==6); l=trunc(int_RM/10); B=trunc(int_RM/l); subvecrv=zeros(T_RM,B); ir=1; do until ir>B; inds=seqa(ir,B,l); subvecss=submat(Xdata,0,inds); rvssir=sumc(((subvecss[.,2:l]-subvecss[.,1:l-1])^2)'); subvecrv[.,ir]=rvssir; ir=ir+1; endo; rvavg=meanc(subvecrv'); nui=sumc(((Xdata[.,2:int_RM]-Xdata[.,1:int_RM-1])^2)')/(2*int_RM); RM3=rvavg-2*l*nui; if mom_yn==1; RM3var=(RM3-meanc(RM3))^2; RM3cov1=(RM3[2:T_RM]-meanc(RM3)).*(RM3[1:T_RM-1]-meanc(RM3)); RM3cov2=(RM3[3:T_RM]-meanc(RM3)).*(RM3[1:T_RM-2]-meanc(RM3)); endif; endif; /* calculate sub-sampled realized volatility (RM4), length T_RM */ if (RMoption==4 or RMoption==6); M=trunc(sqrt(int_RM)); kbar=(M+1)/2; kvar=(M^2-1)/12; subs=zeros(M,T_RM); ir=1; do until ir>M; RM_i=sumc(((Xdata[.,1+ir:cols(Xdata)]-Xdata[.,1:cols(Xdata)-ir])^2)'); RM_i=RM_i/ir; a_i=(ir*(ir-kbar))/(M*kvar); subs[ir,.]=(a_i*RM_i)'; ir=ir+1; endo; RM4=sumc(subs)+2*(sumc(((Xdata[.,2:int_RM]-Xdata[.,1:int_RM-1])^2)')/(2*int_RM)); if mom_yn==1; RM4var=(RM4-meanc(RM4))^2; RM4cov1=(RM4[2:T_RM]-meanc(RM4)).*(RM4[1:T_RM-1]-meanc(RM4)); RM4cov2=(RM4[3:T_RM]-meanc(RM4)).*(RM4[1:T_RM-2]-meanc(RM4)); endif; endif; if (RMoption==5 or RMoption==6); {RM5,RM5var,RM5cov1,RM5cov2}=realker(Xdata,Xstar,T_RM,int_RM,Mstar,1); endif; retp(RM1,RM1var,RM1cov1,RM1cov2,RM2,RM2var,RM2cov1,RM2cov2,RM9,RM9var,RM9cov1,RM9cov2, RM3,RM3var,RM3cov1,RM3cov2,RM4,RM4var,RM4cov1,RM4cov2,RM5,RM5var,RM5cov1,RM5cov2); endp; /* ================================================================================ */ /* Predictive Interval Calculation I - `pseudo true interval' */ /* */ /* all parameters are described in data simulation procedure and are to be fed into */ /* data simulation procedure directly, except u1, u2, and SS */ /* */ /* Inputs: */ /* u1 and u2 are the upper and lower interval bounds */ /* SS is the number of simulations to be run, its assumed that they are all 2 days */ /* long, otherwise need to change T to be greater than 2 and adjust procedure */ proc (3) = predint1(toss,err_fix,Tpred1,Td,sig2_ini,xdat_ini,hhp,kapp,mu,eta, mm,f_inst,u1,u2,SS); local inter_1,jjj,junki,out_sig2,flag2; inter_1=0; /* Simulate data using parameters of the given DGP i.e. true values */ /* and form confidence interval for `pseudo true" IV values */ jjj=1; do while jjj <= SS; {junki,out_sig2,flag2}= sim_data(toss,err_fix,Tpred1,Td,sig2_ini,xdat_ini,hhp,kapp,mu,eta,mm,f_inst); if flag2==1; goto doneit2; endif; inter_1 = inter_1+( (1/SS)*( (u1<=out_sig2[2]) and (out_sig2[2]<=u2) ) ); jjj=jjj+1; endo; doneit2: retp(inter_1,out_sig2[1],flag2); endp; /* ================================================================================ */ /* Predictive Interval Calculation II - `realized measure based interval' */ /* */ /* all parameters are described in data simulation procedure and are to be fed into */ /* data simulation procedure directly, except u1, u2, and SS */ /* */ /* Inputs: */ /* u1 and u2 are the upper and lower interval bounds */ /* IV_1N is the fixed value of integrated volatility used in the simulations */ /* to be held as the fixed conditioning variable across all three interval types */ /* To get this changed for doing empirical work, the feed in for IV_1N which is */ /* subsequently fed into the kernel density estimator should be instead RM_T,M, */ /* which is calculated from the long data simulation done in this procedure. This */ /* is the same RM_T,M which should be used when constructing the 3rd type of intrvl */ /* i.e. simulation based. */ /* T is the number of days worth of X observations (Td of them per day) to be run, */ /* and now this must be much greater than 2 !!!!!! */ /* long, otherwise need to change T to be greater than 2 and adjust procedure */ /* */ /* Note: Here, f_inst should be set to 0!! */ proc (4) = predint2(RMmeas,u1,u2,IV_1N,ktype); local inter_2,fg_int2,uk1,uni_2,h_out; uk1=u1|u2; {inter_2,uni_2,h_out,fg_int2}=int_kern(uk1,IV_1N,RMmeas,0,ktype); retp(inter_2,uni_2,h_out,fg_int2); endp; /* ================================================================================ */ /* Predictive Interval Calculation III - `simulation based interval' */ /* */ /* all parameters are described in data simulation procedure and are to be fed into */ /* data simulation procedure directly, except u1, u2, and SS */ /* */ /* Inputs: */ /* u1 and u2 are the upper and lower interval bounds */ /* SS is the number of simulations to be run, its assumed that they are all 3 days */ /* long, otherwise need to change T to be greater than 3 and adjust procedure */ proc (5) = predint3(toss,err_fix,Tpred3,Td,sig2_ini,xdat_ini,hh,kapp_0, mu_0,eta_0,mm_0,f_inst,u1,u2,SS1,IV_1N,ktype); local b0,bhat,minout,gradout,codeout,mu_1,eta_1,kapp_1,IVsim,junky,o_sig2,jjj, inter_3,flag3,uk1,fg_int3,uni_3,h_out; inter_3=0; /* estimate parameters using GMM */ b0=mu_0|eta_0|kapp_0; /* i commented the line below to get this program running on a machine without op0tmum on it */ /* uncomment to use this procedure!! */ /*{bhat,minout,gradout,codeout}=optmum(&sgmm,b0);*/ /*print "b0~bhat"; print (b0~bhat);*/ mu_1=bhat[1];eta_1=bhat[2];kapp_1=bhat[3]; /* Simulate data using parameters of the given DGP i.e. true values */ IVsim={}; jjj=1; do while jjj <= SS1; {junky,o_sig2,flag3}= sim_data(toss,err_fix,Tpred3,Td,sig2_ini,xdat_ini,hh,kapp_1,mu_1,eta_1,mm_0,f_inst); if flag3==1; goto doneit3; endif; if rows( o_sig2) > 3; print "in int3 o_sig2 has rows = " rows(o_sig2) " where T was " Tpred3; /*print o_sig2;*/ endif; IVsim=IVsim|o_sig2[2:3]'; /* toss 1st out_sig2 value asis fixed for given parm vctr */ jjj=jjj+1; endo; /* now, set the global rv to be IVsim rather than the RM measure used in the GMM */ /* parameter estimation above, hence facillitating the kernel density and interval */ /* calculartions using the new IVsim measure */ rv=IVsim; /* construct interval */ uk1=u1|u2; {inter_3,uni_3,h_out,fg_int3}=int_kern(uk1,IV_1N,IVsim,0,ktype); doneit3: retp(inter_3,uni_3,h_out,flag3,fg_int3); endp; /* ******************************************************************************** */ /* ******************************************************************************** */ /* ---------------------------------------------- */ /* MAIN PROGRAM */ /* ---------------------------------------------- */ /* ******************************************************************************** */ /* ******************************************************************************** */ /* ================================================================================ */ /* Notes!!!! */ /* note, {T_input=1000, Td_input=100, h=1/2} seems to work pretty well!!! */ /* even {T_input=100, Td_input=50, h=1/2} seems to work pretty well!!! */ /* Td = 10 yields too crappy RM for use in SGMM in simulated interval calculations */ /* these next two parms tell whether to fix an initial day of obs via fixing the */ /* how many days of initialization data to use (toss) and errs to fix (err_fix) */ toss=1; err_fix=1; allout1={}; allout2={}; allout3={}; allout4={}; allout5={}; allout6={}; /* */ /* start simulations here!!!! */ /* */ cntchuck=0; /* counter for checked sims due to complex data being generated */ /* Step -1: Form large sim dataset: get vols to get idea of values to select for */ /* for u_1 and u_2 based on variance of IV sequence of this initial sim */ chucksim1: flag=0; T_input=100; Td_input=1152; {junk,ot_sig2,flag}= sim_data(toss,err_fix,T_input,Td_input,sig2_ini,xdat_ini,hh,kapp,mu,eta,mm,1); print "ot_sig2 Initial u1 - u2 calculation"; print ot_sig2; if flag==1; print "problem with complex numbers data generation in Step 1!!"; goto chucksim1; endif; V_sig2=stdc(ot_sig2); M_sig2=meanc(ot_sig2); u1_input=M_sig2-(num*V_sig2); u2_input=M_sig2+(num*V_sig2); print "--------------------------------------"; print " u1 mean(sig) u2 "; print (u1_input~M_sig2~u2_input); print "Xdat mean, min, max"; print meanc(junk)~minc(junk)~maxc(junk); print "ot_sig2 mean, min, max"; print meanc(ot_sig2)~minc(ot_sig2)~maxc(ot_sig2); print "--------------------------------------"; /* ============================================================================================ */ /* Step 0: 'Pseudo True Interval' */ /* This step not used in any empirical application */ SS_input=SS_in1; /* this is the number of simulations to use to calculate the `pseudo true' intrvl */ f_inst=1; /* a flag to be set to 1 if want simulated IV values outputed */ /* T=2 days here for sim length (in days) for `pseudo true intrvl, and hence the `2' below */ chucksim2: flag=0; T_input=2; Td_input=1152; {intvl_1,IV1N_in,flag}=predint1(toss,err_fix,T_input,Td_input,sig2_ini,xdat_ini, hh,kapp,mu,eta,mm,f_inst,u1_input,u2_input,SS_input); if flag==1; print "problem with complex numbers data generation in Step 2!!"; goto chucksim2; endif; print "-------Interval probability - pseudo true--------------------"; print intvl_1~IV1N_in; print "-------------------------------------"; /* ==============================================================================================*/ /* start main looping !!! */ T_i=1; /* number of days in samples */ do while T_i<=T_tot; if T_i==1; T_input=100; elseif T_i==2; T_input=300; elseif T_i==3; T_input=500; elseif T_i==4; T_input=1000; else; T_input=1000; endif; /* output storage matrix */ int_mat1=zeros(simtot,13*M_tot); int_mat2=zeros(simtot,12*M_tot); int_mat3=zeros(simtot,12*M_tot); int_mat4=zeros(simtot,6*M_tot); int_mat5=zeros(simtot,6*M_tot); Sim_i=1; /* simulations loop */ do while Sim_i <= simtot; restart1: flag=0; /* Step 1: Form a large simulation dataset from which to run simulations */ Td_input=M_max; {ot_X_in,ot_sig2,flag}=sim_data(toss,err_fix,T_input,Td_input,sig2_ini,xdat_ini,hh,kapp,mu,eta,mm,1); M_i=1; /* number of obs observed per day is Td_input, so total available smpl is T_input*Td_input */ do while M_i<=M_tot; if M_i==1; Td_input=72; elseif M_i==2; Td_input=144; elseif M_i==3; Td_input=288; elseif M_i==4; Td_input=576; else; Td_input=1152; endif; Mstar=48; /* ------------- */ /* Extract dataset of appropriate number of intraday obs from the long dataset generated above */ vec2={}; vec3={}; vec1=1; if Td_input= 0) == 13; /* int_mat1 keeps track of interval probabilities */ int_mat1[sim_i,((M_i-1)*13)+1:((M_i-1)*13)+13]= (intvl_1~intvl_2a~intvl_2b~intvl_2i~intvl_2c~intvl_2d~intvl_2e~intvl_3a~intvl_3b~intvl_3i~intvl_3c~intvl_3d~intvl_3e); /* int_mat2 keeps track of scaled random variables that are asmp. N(0,1) */ int_mat2[sim_i,((M_i-1)*12)+1:((M_i-1)*12)+12]= (SN_int2a~SN_int2b~SN_int2i~SN_int2c~SN_int2d~SN_int2e~SN_int3a~SN_int3b~SN_int3i~SN_int3c~SN_int3d~SN_int3e); /* intmat2 keeps track of 2nd moments of measurement errors */ int_mat4[sim_i,((M_i-1)*6)+1:((M_i-1)*6)+6]=merr2; int_mat5[sim_i,((M_i-1)*6)+1:((M_i-1)*6)+6]=merr4; else; print "problem with complex numbers data generation output gathering step!!"; flag=1; goto chucksim; endif; M_i=M_i+1; endo; print "Simulation # " Sim_i " is done! for T_i = " T_i; Sim_i=Sim_i+1; endo; /* gather together intermediate printable output */ oi=1; do while oi<=M_tot; int_mat=int_mat1[.,((oi-1)*13)+1:((oi-1)*13)+13]; intmat2a=int_mat2[.,((oi-1)*12)+1:((oi-1)*12)+12]; print "All intervals for " simtot " simulations are the following, for M_i = " oi; print "Sample in days is T_i = " T_i; print "Results are based upon a " num " standard error intrvl around mean of the pseudo true IV"; print "=============================================================="; print int_mat; print "=============================================================="; print "All normal variates for " simtot " simulations are the following, for M_i = " oi; print "Sample in days is T_i = " T_i; print "Results are based upon a " num " standard error intrvl around mean of the pseudo true IV"; print "=============================================================="; print intmat2a; print "=============================================================="; /* collect together output for tables in paper */ /* relative absolute probability bias measures */ rbias=(abs(int_mat[.,2:13]-int_mat[.,1])./int_mat[.,1])*100; rbias_se=(stdc(rbias))'; rbias=(meanc(rbias))'; allout1=allout1|rbias; allout1=allout1|rbias_se; /* absolute probability bias measures */ bias=(abs(int_mat[.,2:13]-int_mat[.,1])); bias_se=(stdc(bias))'; bias=(meanc(bias))'; allout2=allout2|(int_mat[1,1]~bias); allout2=allout2|(0.0000~bias_se); /* average probability measures */ aprob=int_mat[.,2:13]; aprob_se=(stdc(aprob))'; aprob=(meanc(aprob))'; allout3=allout3|(int_mat[1,1]~aprob); allout3=allout3|(0.0000~aprob_se); /* median probability measures */ mprob=int_mat[.,2:13]; mprob_se=(stdc(mprob))'; mprob=(median(mprob))'; allout4=allout4|(int_mat[1,1]~mprob); allout4=allout4|(0.0000~mprob_se); /* rejection frequencies of normality at 5%, 10% level */ allout5=allout5|((sumc(abs(intmat2a).>1.96))'/rows(intmat2a)); allout6=allout6|((sumc(abs(intmat2a).>1.68))'/rows(intmat2a)); oi=oi+1; endo; /* Print intermediate tabulated output after each sample size done */ /* Part I -- Relative Mean Absolute Probability Bias and standard error of relative bias) */ print "/* Part I -- Relative Mean Absolute Probability Bias and standard error thereofs) */"; print "Intermediate Tabulated Final Output"; print "Output has all intraday rows (i.e. Mx2 fo them), for a first T, then again for the next T, etc."; print "Each pair of rows is rbias in row 1 and stderr(rbias) in row 2"; print "==============================================================="; print allout1; print "==============================================================="; /* Part II -- Mean Absolute Probability Bias and standard error thereof) */ print "/* Part II -- Mean Absolute Probability Bias and standard error thereof) */"; print "Intermediate Tabulated Final Output"; print "Output has all intraday rows (i.e. Mx2 fo them), for a first T, then again for the next T, etc."; print "Each pair of rows is bias in row 1 and stderr thereof in row 2"; print "==============================================================="; print allout2; print "==============================================================="; /* Part III -- Mean Probability and standard error thereof) */ print "/* Part III -- Mean Probability and standard error thereof) */"; print "Intermediate Tabulated Final Output"; print "Output has all intraday rows (i.e. Mx2 fo them), for a first T, then again for the next T, etc."; print "Each pair of rows is bias in row 1 and stderr thereof in row 2"; print "==============================================================="; print allout3; print "==============================================================="; /* Part IV -- Median Probability and standard error) */ print "/* Part IV -- Median Probability and standard error) */"; print "Intermediate Tabulated Final Output"; print "Output has all intraday rows (i.e. Mx2 fo them), for a first T, then again for the next T, etc."; print "Each pair of rows is bias in row 1 and stderr thereof in row 2"; print "==============================================================="; print allout4; print "==============================================================="; /* Part V -- Normal Random Variable Empirical Size and Power Results */ print "/* Part V -- Normal Random Variable Empirical Size and Power Results 5% */"; print "Intermediate Tabulated Final Output"; print "Output has all intraday rows (i.e. Mx2 fo them), for a first T, then again for the next T, etc."; print "Each pair of rows is bias in row 1 and stderr thereof in row 2"; print "==============================================================="; print allout5; print "==============================================================="; print "/* Part V -- Normal Random Variable Empirical Size and Power Results 10% */"; print "Intermediate Tabulated Final Output"; print "Output has all intraday rows (i.e. Mx2 fo them), for a first T, then again for the next T, etc."; print "Each pair of rows is bias in row 1 and stderr thereof in row 2"; print "==============================================================="; print allout6; print "==============================================================="; print "/* Part VI -- 2nd and 4th moments of measurement error */"; print "2nd"; int_mat4=(meanc(int_mat4))'; print reshape(int_mat4,m_tot,5); print " "; print "4th"; int_mat5=(meanc(int_mat5))'; print reshape(int_mat5,m_tot,5); T_i=T_i+1; endo; /* this next stuff should be at VERY end of program */ chucksim: if flag==1; cntchuck=cntchuck+1; goto restart1; endif; if flagg==1; cntchuck=cntchuck+1; goto restart1; endif; print "In total, " cntchuck " sims were chucked!!"; output off;