\documentclass[12pt,a4paper,final]{article}

\usepackage{amsfonts}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{commath} % for abs{}
\usepackage[margin=2.5cm]{geometry}
\usepackage[longnamesfirst]{natbib}
\usepackage[T1]{fontenc}
\usepackage{enumitem}
\usepackage{ntheorem}

\theoremstyle{break}
\newtheorem{theorem}{Theorem}
\newtheorem{assumption}{Assumption}

\newcounter{specialE}
\renewcommand{\thespecialE}{E}
\newtheorem{Eassumption}[specialE]{Assumption}

\DeclareMathOperator*\plim{plim}

\numberwithin{equation}{section}

\newcommand{\rmi}{{\itshape (i)}}
\newcommand{\rmii}{{\itshape (ii)}}
\newcommand{\rmiii}{{\itshape (iii)}}

\makeatletter
\let\@fnsymbol\@alph
\makeatother

% cross references to main file
\usepackage{xr}
\externaldocument{ChristopeitMassmann2016final}

% use blackboard bold type for expectation operators?
\newcommand{\E}{\mathbf{E}}

\usepackage[bookmarksopenlevel=1,bookmarksnumbered,pdftex,colorlinks=false]{hyperref}


\begin{document}

\title{Online Supplement for\\\emph{Estimating structural parameters in regression models with adaptive learning}}
\author{Norbert Christopeit \\ University of Bonn \and Michael Massmann\thanks{Corresponding author: Michael Massmann, WHU -- Otto Beisheim School of Management, Burgplatz 2, 56179 Vallendar, Germany, phone: +49 (0)261 6509370,
    email: \href{mailto:m.massmann@vu.nl}{\texttt{michael.massmann@whu.edu}}.} \\ WHU -- Otto Beisheim School of Management\\and\\Vrije Universiteit Amsterdam}
\date{\today}

\maketitle

\tableofcontents

\newpage


\appendix
\setcounter{section}{2}



\section{Time-varying regressors}
\label{sec:TVregr}

The assumption in this paper that the exogenous regressor $x_{t}$ is constant serves the purpose of analytical tractability, especially in order to facilitate the examination of the asymptotic behaviour of $a_t$, at least to such
an extent as is needed for the treatment of the EEP. Upon inspecting the proofs, however, it is apparent that time-varying regressors $ x_{t} $ are likely to lead to complications in the analysis. In particular, in the case of
constant gain learning, the analogue of \eqref{eq:a_CGall} for general time-varying $x_t$ is
\begin{equation*}
  a_t = \left[1 - c \frac{x_t^2}{r_t} \right] a_{t-1} + c \frac{x_t^2}{r_t} + \gamma  \frac{x_t}{r_t} \varepsilon_t
\end{equation*}
such that $x_t$ may cause the autoregressive coefficient to switch between the stable, unit root or explosive regimes. This issue will not arise in the case of decreasing gain learning as long as $a_t$ converges to the REE
$\alpha$. On the other hand, the singularity of the asymptotic second moment matrix will persist, see equation \eqref{eq:asymp2ndmoment}. In order derive substantial results in these settings, strong assumptions will have be
imposed on the regressors. One such restriction is the case of the regressors tending to an equilibrium value which, not surprisingly, leads to essentially the same results as in Theorems \ref{thIEPconst}-\ref{th4}.\medskip

\begin{Eassumption}
  \label{ass:E}
  The sequence $x_{t}$ tends to an equilibrium value $x$: $\lim_{t\rightarrow \infty }x_{t}=x. $
\end{Eassumption}

Without loss of generality, we may again assume that $x=1.$ The $x_{t}$ are taken to be deterministic for expositional simplicity. Identical calculations to those below would result for stochastic regressors if, for instance, \rmi
\ the regressors are strictly exogenous, i.e.\ the sequence $ x_{t} $ is independent of the error terms $\varepsilon _{t} ,$ and \rmii \ Assumption \ref{ass:E} holds with probability one.



\subsection{Constant gain}

Reconsider the recursion of $r_t $ in \eqref{2.12b} with a constant gain $\gamma_t  = \gamma$:
\begin{equation}
r_{t}=\left( 1-\gamma \right) r_{t-1}+\gamma x_{t}^{2}.  \label{r}
\end{equation}
With the  solution of \eqref{r} given by
\begin{equation*}
r_{t}=\rho ^{t}r_{0}+\gamma \sum_{n=0}^{t-1}\rho ^{n}x_{t-n}^{2} %=\rho^{t}r_{0}+\gamma \rho ^{t}\sum_{k=1}^{t}\rho ^{-k}x_{k}^{2}.  \label{r2}
\end{equation*}%
it follows that $r_{t}$ tends to the equilibrium value $r=x^{2}=1$, provided that $\gamma \in \left( 0,1\right) $.
Substituting this into the dynamics of $a_{t}$ in \eqref{2.12a} yields the recursion in \eqref{eq:a_CGall}, as indeed was obtained under the assumption of a constant $x_t = x$. As a consequence, the asymptotics of
$a_{t}$ are also the same.

Regarding the EEP, note that the structural equation is given by
\begin{equation*}
y_{t} =\delta x_{t}+\beta a_{t-1}x_{t}+\varepsilon _{t}
\end{equation*}%
or
\begin{equation*}
y^x_{t}=\delta _{t}+\beta a_{t-1}+\varepsilon _{t}^{x},
\end{equation*}%
with $y^x_t = y_t/x_t$ and $\varepsilon _{t}^{x}=\varepsilon _{t}/x_{t}.$ Remembering that $x_t \rightarrow 1$, it can be shown that passing from $\varepsilon _{t}^{x}$ to $\varepsilon _{t}$ does not affect the behaviour of the
OLS estimator, so that all the results for the EEP in the case of constant gain remain valid.



\subsection{Decreasing gain}

Reconsider the recursion of $r_t $ in \eqref{2.12b} with a decreasing gain sequence $\gamma_t  = \gamma/t$, i.e.\
\begin{equation*}
r_{t}=\left( 1-\frac{\gamma }{t}\right) r_{t-1}+\frac{\gamma }{t}x_{t}^{2}.
\end{equation*}%
This is of the same form as equation \eqref{eq. for a*}. Hence, performing the same analysis on $r_t$ as is done on $a_{t}$ in Appendices \ref{sec:case-cless12} and \ref{sec:case-cgeq12} shows that, for every $\gamma >0,$%
\begin{equation*}
\lim_{t\rightarrow \infty }r_{t}=r\gamma x^{2}=r\gamma
\end{equation*}%
for some positive number $r$. % (cf. (B.16) - (B.19) with $\varepsilon _{i}$ replaced $x_{i}^{2}.$
Using this equilibrium value in the dynamics for $ a_{t}$ in \eqref{2.12a} we obtain
\begin{equation*}
a_{t}=a_{t-1}+\frac{1/r}{t}\left( y_{t}-a_{t-1}\right) ,
\end{equation*}%
which is just \eqref{eq:recursion a_t general} with $\gamma_t = \widetilde{\gamma }/t$ and $\widetilde \gamma =1/r $. Note, however, that in order to determine $\widetilde{\gamma }$ and, correspondingly, the value of
$\widetilde{c}=\left( 1-\widetilde{\gamma }\right) \beta ,$ one has to know $r.$ This, however, is given by
\begin{equation*}
r=\lim_{t\rightarrow \infty }\frac{1}{t^{\gamma }}\sum_{i=1}^{t}\frac{\theta
_{i}}{i^{1-\gamma }},
\end{equation*}%
cf.\ \eqref{v and w case a} and \eqref{case a}. Since $\theta _{i}\rightarrow 1,$ it is clear that
\begin{equation*}
r=\lim_{t\rightarrow \infty }\frac{1}{t^{\gamma }}\sum_{i=1}^{t}\frac{1}{%
i^{1-\gamma }}=\lim_{t\rightarrow \infty }\frac{1}{t^{\gamma }}\left[
\int_{1}^{t}\frac{ds}{s^{1-\gamma }}+O(1)\right] =\frac{1}{\gamma }.
\end{equation*}%
Hence, $\widetilde{\gamma }=\gamma $ and $\widetilde{c}=c.$ As a consequence, up to a change in variance, we have the same asymptotics for $a_{t}$ as for $ x_{t}=1.$ The same is true for the EEP.



\section{Consistency}
\label{sec:consist}

The weak consistency of the OLS estimator in Sections \ref{sc:CGlearning} and \ref{sec:decr-gain-learn} is obtained as a byproduct of our results in Theorems \ref{thEEPconst} and \ref{th4}. It is instructive, however, to look at
our results in the light of the results available in the literature on consistency in models with predetermined regressors. The reason is that even the best of those conditions turn out not to be met by some of the constant and
decreasing gain learning models we consider in this paper. This finding complements the failure of the Grenander condition for the decreasing gain model in Section \ref{sec:decr-gain-learn}, see also the discussion in the
introduction.

To our knowledge, the best sufficient condition for the consistency of the OLS estimator in multivariate models with predetermined regressors is given in \citet{LaiWei82a}.  It requires that
\begin{equation}
\lambda _{\min }(T)\rightarrow \infty \text{ \ and \ }\frac{\ln \lambda_{\max }(T)}{\lambda _{\min }(T)}\rightarrow 0\text{ \ a.s.,}  \label{7-1}
\end{equation}%
where $\lambda _{\max }(T)$ and $\lambda _{\min }(T)$ are the maximal and minimal eigenvalue, respectively, of the regressors' moment matrix $M_{T}$.  For the estimation of the slope parameter in a simple regression model, a
slight improvement is given in \citet{LaiWei82b} with the condition
\begin{equation}
\frac{A_{T}}{\ln T}\rightarrow \infty \text{ \ a.s.,}  \label{7-2}
\end{equation}%
with $A_{T}$ being  the usual sum of squared mean-adjusted regressors. To illustrate the strength of \eqref{7-1}, \citet{LaiWei82a} discuss an example in which a marginal violation of the conditions leads to the inconsistency of
the OLS estimator. They hence call the conditions in \eqref{7-1} ``in some sense the weakest possible'' (p.\ 155).

For the purpose of comparing \eqref{7-1} to our results on weak consistency, note that this condition may also be used in terms of convergence in probability, in the sense that
\begin{equation}
\frac{\ln \lambda _{\max }(T)}{\lambda _{\min }(T)}\overset{p}{\rightarrow } 0  \label{7-3}
\end{equation}%
implies the weak consistency of the OLS estimator, say $\widehat{\theta }_{T}.$ This is because the basic result obtained by \citet{LaiWei82a} is that
\begin{equation*}
\left \Vert \widehat{\theta }_{T}-\theta \right \Vert ^{2}=\frac{\ln \lambda
_{\max }(T)}{\lambda _{\min }(T)}O(1)  \text{ \ a.s.}
\end{equation*}%
on the set $\left \{ \lambda _{\min }(T)>0\right \} .$ Let us briefly discuss condition (\ref{7-3}) for the various models considered in this paper.



\subsection{Constant gain}

Reconsider the model in \eqref{eq:y_CG}-\eqref{eq:a_CGall}. For the stable case, (\ref{7-3}) is trivially satisfied since all entries of $M_{T}$ in \eqref{M_OLS} satisfy a weak LLN. The same is true for the unit root case, as can be
shown by some straightforward calculations on the eigenvalues, using the asymptotic behaviour of the properly normalised entries of $M_{T}$ as obtained in Appendix \ref{appCGbetahatc=0}. For the explosive case, similar
calculations making use of Theorem \ref{thIEPconst} \emph{(iii)} show that
\begin{equation*}
\frac{\ln \lambda _{\max }(T)}{\lambda _{\min }(T)}\rightarrow 4\ln \abs{1-c} \text{ \ a.s.}.
\end{equation*}%
Hence (\ref{7-3}) is violated, but weak consistency still holds.



\subsection{Decreasing gain}

Turn now to model \eqref{eq:finalmodel1}-\eqref{eq:finalmodel2}. For $c<1/2,$ it can be verified that condition (\ref{7-3}) is met. For $c>1/2$, however, it is shown in Appendix \ref{apProofTh3} that
\begin{equation*}
\plim_{T\rightarrow \infty }\frac{A_{T}}{\ln T}=\frac{\sigma
^{2}\gamma ^{2}}{2c-1}.
\end{equation*}%
Hence (\ref{7-2}) is not satisfied. Also, \citet{ChristopeitMassmann13c} conclude that
\begin{equation*}
\plim_{T\rightarrow \infty }\frac{\ln \lambda _{\max }(T)}{\lambda
_{\min }(T)}=\left( \alpha ^{2}+1\right) \frac{2c-1}{\sigma ^{2}\gamma ^{2}}
\end{equation*}%
so that (\ref{7-3}) is not satisfied either. Nevertheless, Theorem \ref{th4} implies that the slope estimator is weakly consistent. 



\section{The Lindeberg conditions}
\label{sec:lindeberg-condition}

\subsection{Theorem \ref{th2} for \texorpdfstring{$c\geq 1/2$}{c>=1/2}}
\label{app:LCth3}

We verify the Lindeberg condition for sums of independent random variables, cf.\ \citet[Chapter III, \S 4, Theorem 1]{Shiryaev96}. Put differently, for every $\delta >0,$%
\begin{equation*}
V_{t}=\frac{1}{\left \langle v\right \rangle _{t}}\sum_{i=1}^{t}\E %
\frac{\varepsilon _{i}^{2}}{i^{2\left( 1-c\right) }}1_{\left \{ \left \vert
\varepsilon _{i}\right \vert >\delta i^{1-c}\left \langle v\right \rangle
_{t}^{1/2}\right \} }\rightarrow 0.
\end{equation*}%
For $c>1/2,$ taking account of (\ref{bracket c>1/2}),
\begin{eqnarray*}
\left \{ \left \vert \varepsilon _{i}\right \vert >\delta i^{1-c}\left \langle
v\right \rangle _{t}^{1/2}\right \}  &=&\left \{ \left \vert \varepsilon
_{i}\right \vert >\frac{\sigma }{\sqrt{2c-1}}\delta i^{1-c}\sqrt{t^{2c-1}+O(1)%
}\right \}  \\
&=&\left \{ \left \vert \varepsilon _{i}\right \vert >\frac{\sigma }{\sqrt{2c-1}%
}\left( 1+o(1\right) )\delta i^{1-c}t^{c-1/2}\right \}  \\
&\subset &\left \{ \left \vert \varepsilon _{i}\right \vert >\kappa \left(
1+o(1\right) )t^{p}\right \}
\end{eqnarray*}%
with $p=\left( c\wedge 1\right) -\frac{1}{2}$ and $\kappa >0.$ The last inclusion follows from the fact that $i^{1-c}\geq t^{1-c\text{ }}$ for $ c\geq 1$ and $i^{1-c}\geq 1$ for $c<1.$ Therefore, by square integrability of
$\varepsilon _{i},$
\begin{equation*}
\E \varepsilon _{i}^{2}1_{\left \{ \left \vert \varepsilon
_{i}\right \vert >\delta i^{1-c}\left \langle v\right \rangle
_{t}^{1/2}\right \} }\leq \E \varepsilon _{1}^{2}1_{\left \{ \left \vert
\varepsilon _{1}\right \vert >\kappa \left( 1+o(1\right) )t^{p}\right \} }=\pi
_{t}\rightarrow 0
\end{equation*}%
as $t\rightarrow \infty .$ As a consequence,%
\begin{equation*}
V_{t}\leq \frac{\pi _{t}}{\left \langle v\right \rangle _{t}}\sum_{i=1}^{t}%
\frac{1}{i^{2\left( 1-c\right) }}=\frac{\pi _{t}}{\sigma ^{2}}\rightarrow 0.
\end{equation*}

For $c=1/2,$ the proof runs similarly, now making use of (\ref{bracket c=1/2}):
\begin{eqnarray*}
\left \{ \left \vert \varepsilon _{i}\right \vert >\delta i^{1/2}\left
\langle v\right \rangle _{t}^{1/2}\right \} &=&\left \{ \left \vert
\varepsilon _{i}\right \vert >\sigma \left( 1+o(1\right) )\delta i^{1/2}%
\sqrt{\ln t}\right \} \\
&\subset &\left \{ \left \vert \varepsilon _{i}\right \vert >\kappa \left(
1+o(1\right) )\sqrt{\ln t}\right \} ,
\end{eqnarray*}%
so that
\begin{equation*}
\E \varepsilon _{i}^{2}1_{\left \{ \left \vert \varepsilon _{i}\right
\vert >\delta i^{1-c}\left \langle v\right \rangle _{t}^{1/2}\right \} }\leq
\E \varepsilon _{1}^{2}1_{\left \{ \left \vert \varepsilon _{1}\right
\vert >\kappa \left( 1+o(1\right) )\sqrt{\ln t}\right \} }=\pi
_{t}\rightarrow 0
\end{equation*}%
and hence%
\begin{equation*}
V_{t}=\frac{1}{\left \langle v\right \rangle _{t}}\sum_{i=1}^{t}\E %
\frac{\varepsilon _{i}^{2}}{i}1_{\left \{ \left \vert \varepsilon _{i}\right
\vert >\delta i^{1/2}\left \langle v\right \rangle _{t}^{1/2}\right \} }\leq
\frac{\pi _{t}}{\left \langle v\right \rangle _{t}}\sum_{i=1}^{t}\frac{1}{i}=%
\frac{\pi _{t}}{\sigma ^{2}}\rightarrow 0.
\end{equation*}


\subsection{Theorem \ref{th4} for \texorpdfstring{$c>1/2$}{c>1/2}}
\label{app:LCth4i}


Reconsider the martingale in (\ref{M}), reproduced here for convenience:%
\begin{equation*}
M_{T}=\sum_{t=1}^{T}\xi _{Tt}\varepsilon _{t},\text{ \ }\xi _{Tt}=\frac{%
a_{t-1}}{\sqrt{\alpha _{T}}}.
\end{equation*}%
We have to show that, for every $\delta >0,$%
\begin{equation}
R_{T}=\sum_{t=1}^{T}\E \left \{ \xi _{Tt}^{2}\varepsilon
_{t}^{2}1_{\left \{ \left \vert \xi _{Tt}\varepsilon _{t}\right \vert >\delta
\right \} }|\mathcal{F}_{t-1}\right \} \overset{p}{\rightarrow }0,  \label{LC}
\end{equation}%
cf.\ \citet{ChristopeitHoderlein06}.  To this end, we make use of the elementary implication $\left \vert ab\right \vert >\delta \Rightarrow a^{2}>\delta $ or $b^{2}>\delta $ to obtain the inclusion $\left \{ \left \vert \xi
    _{Tt}\varepsilon _{t}\right \vert >\delta \right \} =\left \{ \left \vert a_{t-1}\varepsilon _{t}\right \vert >\delta \sqrt{\alpha _{T}}\right \} \subset \left \{ a_{t-1}^{2}>\delta \sqrt{\alpha _{T}}\right \} \cup \left \{
  \varepsilon _{t}^{2}>\delta \sqrt{\alpha _{T}}\right \} .$ Therefore,%
\begin{eqnarray*}
R_{T} &\leq &\frac{1}{\alpha _{T}}\sum_{t=1}^{T}\E \left \{
a_{t-1}^{2}\varepsilon _{t}^{2}1_{\left \{ a_{t-1}^{2}>\delta \sqrt{\alpha
_{T}}\right \} }|\mathcal{F}_{t-1}\right \}  \\
&&\text{ \  \  \  \  \  \  \ }+\frac{1}{\alpha _{T}}\sum_{t=1}^{T}\E %
\left \{ a_{t-1}^{2}\varepsilon _{t}^{2}1_{\left \{ \varepsilon
_{t}^{2}>\delta \sqrt{\alpha _{T}}\right \} }|\mathcal{F}_{t-1}\right \}  \\
&=&\frac{\sigma ^{2}}{\alpha _{T}}\sum_{t=1}^{T}a_{t-1}^{2}1_{\left \{
a_{t-1}^{2}>\delta \sqrt{\alpha _{T}}\right \} }+\frac{1}{\alpha _{T}}%
\sum_{t=1}^{T}a_{t-1}^{2}\E \left \{ \varepsilon _{t}^{2}1_{\left \{
\varepsilon _{t}^{2}>\delta \sqrt{\alpha _{T}}\right \} }\right \}  \\
&=&R_{T}^{0}+R_{T}^{1}.
\end{eqnarray*}%
As to $R_{T}^{0}$, since $a_{t}\rightarrow \alpha $ a.s.$,$ there will be a $ T_{0\text{ }}$(depending on $\omega $) such that $a_{t-1}^{2}\leq \delta \sqrt{\alpha _{T}}$ for all $t>T_{0}.$ Hence the sum is finite and
\begin{equation}
R_{T}^{0}\rightarrow 0\text{ \ a.s.}  \label{asy R_0}
\end{equation}%
As to $R_{T}^{1},$%
\begin{equation*}
\E \left \{ \varepsilon _{t}^{2}1_{\left \{ \varepsilon
_{t}^{2}>\lambda _{T}\delta \right \} }\right \} =\pi _{T}\rightarrow 0.
\end{equation*}%
Hence, taking account of (\ref{rate of A_prime}),
\begin{equation}
R_{T}^{1}=\frac{\pi _{T}}{\alpha _{T}}\sum_{t=1}^{T}a_{t-1}^{2}\mathbf{=}\pi
_{T}\frac{A_{T}^{\prime }}{\alpha _{T}}\overset{p}{\rightarrow }0.
\label{asy R_1}
\end{equation}
(\ref{asy R_0}) and (\ref{asy R_1}) together show (\ref{LC}).


\subsection{Theorem \ref{th4} for \texorpdfstring{$c<1/2$}{c<1/2}}
\label{app:LCth4ii}

By definition (cf. (\ref{X})),%
\begin{equation*}
X_{T}=\sum_{t=1}^{T}\xi _{Tt}\varepsilon _{t}
\end{equation*}%
with%
\begin{equation*}
\xi _{Tt}=\frac{1}{T^{1/2-c}}\left( t^{-c}-\frac{T^{-c}}{1-c}\right)
\end{equation*}%
To show:%
\begin{equation*}
R_{T}=\sum_{t=1}^{T}\E \left \{ \xi _{Tt}^{2}\varepsilon
_{t}^{2}1_{\left \{ \left \vert \xi _{Tt}\varepsilon _{t}\right \vert >\delta
\right \} }|\mathcal{F}_{t-1}\right \} \overset{p}{\rightarrow }0.
\end{equation*}%
But%
\begin{equation*}
R_{T}=\sum_{t=1}^{T}\xi _{Tt}^{2}\E \left \{ \varepsilon
_{t}^{2}1_{\left \{ \varepsilon _{t}^{2}>\delta ^{2}/\xi _{Tt}^{2}\right \}
}\right \} .
\end{equation*}%
Since
\begin{equation*}
\max_{t\leq T}\left \vert \xi _{Tt}\right \vert \leq \frac{1}{T^{1/2-c}}+\frac{%
1}{1-c}\frac{1}{T^{1/2}}=m_{T}=o(1),
\end{equation*}%
it follows that%
\begin{equation*}
\pi _{T}=\E \left \{ \varepsilon _{t}^{2}1_{\left \{ \varepsilon
_{t}^{2}>\delta ^{2}/\xi _{Tt}^{2}\right \} }\right \} \leq \E \left \{
\varepsilon _{t}^{2}1_{\left \{ \varepsilon _{t}^{2}>\delta
^{2}/m_{T}^{2}\right \} }\right \} \rightarrow 0.
\end{equation*}%
Therefore,%
\begin{equation*}
R_{T}\leq m_{T}\sum_{t=1}^{T}\xi _{Tt}^{2}\rightarrow 0
\end{equation*}%
since $\sum_{t=1}^{T}\xi _{Tt}^{2}=O(1).$ 



\section{Proof of Corollary \ref{cr2}\label{appSig}}

Consider the OLS residual $\widehat{\varepsilon }_{t}=y_{t}-\widehat{\delta }-\widehat{\beta }a_{t-1}=m_{t}+\varepsilon _{t},$ where
\begin{equation*}
m_{t}=(\delta -\widehat{\delta })+(\beta -\widehat{\beta })a_{t-1}.
\end{equation*}%
Then
\begin{equation*}
\sum_{t=1}^{T}\widehat{\varepsilon }_{t}^{2}=\sum_{t=1}^{T}m_{t}^{2}+2%
\sum_{t=1}^{T}m_{t}\varepsilon _{t}+\sum_{t=1}^{T}\varepsilon _{t}^{2}.
\end{equation*}%
Since
\begin{eqnarray*}
\frac{1}{T}\sum_{t=1}^{T}m_{t}^{2} &\leq &\frac{2}{T}\left[ T(\delta -%
\widehat{\delta })^{2}+(\beta -\widehat{\beta })^{2}\sum_{t=1}^{T}a_{t-1}^{2}%
\right] =o(1), \\
\frac{1}{T}\left \vert \sum_{t=1}^{T}m_{t}\varepsilon _{t}\right \vert &\leq
&\left[ \frac{1}{T}\sum_{t=1}^{T}m_{t}^{2}\frac{1}{T}\sum_{t=1}^{T}%
\varepsilon _{t}^{2}\right] ^{1/2}=o(1),
\end{eqnarray*}%1
it follows that%
\begin{equation*}
\frac{1}{T}\sum_{t=1}^{T}\widehat{\varepsilon }_{t}^{2}=\frac{1}{T}%
\sum_{t=1}^{T}\varepsilon _{t}^{2}+o(1)\rightarrow \sigma ^{2}
\end{equation*}%
with probability one or in probability according to whether both $\widehat{\delta }$ and $\widehat{\beta }$ are strongly or weakly consistent.


\section{Proof of equation \eqref{asy R & S} }
\label{apProofRS}

\paragraph{Ad $R$.}

Recall the definition of $R_T$ in \eqref{R & S new}:
\begin{equation*}
  R_{T}=\frac{1}{T^{1/2-c}}\sum_{t=1}^{T}t^{-c}\zeta _{t}\varepsilon _{t}.
\end{equation*}
With a view to deriving $\E  R^2_T $, we calculate%
\begin{eqnarray}
\E \left[ \sum_{t=1}^{T}t^{-c}\zeta _{t}\varepsilon _{t}\right] ^{2}
&=&\E \sum_{s,t=1}^{T}t^{-c}\zeta _{t}\varepsilon _{t}s^{-c}\zeta
_{s}\varepsilon _{s}  \notag \\
&=&2\E \sum_{t=1}^{T}t^{-c}\zeta _{t}\varepsilon
_{t}\sum_{s=1}^{t-1}s^{-c}\zeta _{s}\varepsilon _{s}+\E %
\sum_{t=1}^{T}t^{-2c}\zeta _{t}^{2}\varepsilon _{t}^{2}  \notag \\
&=&2\sum_{t=1}^{T}\sum_{s=1}^{t-1}t^{-c}s^{-c}\E \zeta
_{t}\varepsilon _{t}\zeta _{s}\varepsilon _{s}+\E %
\sum_{t=1}^{T}t^{-2c}\zeta _{t}^{2}\varepsilon _{t}^{2}  \notag \\
&=&R_{1T}+R_{2T}.  \label{deco R}
\end{eqnarray}%
As to $R_{1T}$, making use of (\ref{zeta}), we obtain for $s<t$ that%
\begin{eqnarray*}
\E \zeta _{t}\varepsilon _{t}\zeta _{s}\varepsilon _{s} &=&\E %
\left \{ \left[ \sum_{i=t}^{\infty }\theta _{i}\frac{\varepsilon _{i}}{%
i^{1-c}}\right] \varepsilon _{t}\left[ \sum_{i=s}^{\infty }\theta _{i}\frac{%
\varepsilon _{i}}{i^{1-c}}\right] \varepsilon _{s}\right \} \\
&=&\E \left \{ \left[ \sum_{i=t}^{\infty }\theta _{i}\frac{%
\varepsilon _{i}}{i^{1-c}}\right] ^{2}\varepsilon _{t}\varepsilon _{s}\right
\} \\
&&+\E \left \{ \left[ \sum_{i=t}^{\infty }\theta _{i}\frac{%
\varepsilon _{i}}{i^{1-c}}\right] \varepsilon _{t}\left[ \sum_{i=s}^{t-1}%
\theta _{i}\frac{\varepsilon _{i}}{i^{1-c}}\right] \varepsilon _{s}\right \}
\\
&=&\E \left \{ \left[ \sum_{i=t}^{\infty }\theta _{i}\frac{%
\varepsilon _{i}}{i^{1-c}}\right] \varepsilon _{t}\right \} \E \left
\{ \left[ \sum_{i=s}^{t-1}\theta _{i}\frac{\varepsilon _{i}}{i^{1-c}}\right]
\varepsilon _{s}\right \} \\
&=&\sigma ^{4}\theta _{t}\theta _{s}\frac{1}{t^{1-c}}\frac{1}{s^{1-c}}.
\end{eqnarray*}%
Hence, remembering that $\lim_{t\rightarrow \infty }\theta _{t}=1,$%
\begin{eqnarray}
R_{1T} &=&2\sigma ^{4}\sum_{t=1}^{T}t^{-c}\theta _{t}\frac{1}{t^{1-c}}%
\sum_{s=1}^{t-1}s^{-c}\theta _{s}\frac{1}{s^{1-c}}  \notag \\
&=&2\sigma ^{4}\sum_{t=1}^{T}\theta _{t}\frac{1}{t}\sum_{s=1}^{t-1}\theta
_{s}\frac{1}{s}  \notag \\
&=&O\left( 1\right) \sum_{t=1}^{T}\frac{1}{t}\left[ \ln t+O\left( 1\right) %
\right]  \notag \\
&=&O\left( 1\right) \ln ^{2}T.  \label{R1}
\end{eqnarray}%
As to $R_{2T},$%
\begin{eqnarray*}
\E \zeta _{t}^{2}\varepsilon _{t}^{2} &=&\E \left[ \zeta
_{t+1}+\theta _{t}\frac{\varepsilon _{t}}{t^{1-c}}\right] ^{2}\varepsilon
_{t}^{2} \\
&=&\E \zeta _{t+1}^{2}\E \varepsilon _{t}^{2}+\frac{\theta
_{t}^{2}}{t^{2\left( 1-c\right) }}\E \varepsilon _{t}^{4} \\
&=&\sigma ^{4}\sum_{i=t+1}^{\infty }\frac{\theta _{i}^{2}}{t^{2\left(
1-c\right) }}+\frac{\theta _{t}^{2}}{t^{2\left( 1-c\right) }}m_{4} \\
&=&O\left( t^{2c-1}\right) .
\end{eqnarray*}%
Hence%
\begin{equation}
R_{2T}=\sum_{t=1}^{T}t^{-2c}\E \zeta _{t}^{2}\varepsilon
_{t}^{2}=O\left( 1\right) \sum_{t=1}^{T}\frac{1}{t}=O\left( 1\right) \ln T.
\label{R2}
\end{equation}%
Therefore, taking (\ref{deco R}), (\ref{R1}) and (\ref{R2}) together, we find that%
\begin{equation*}
\E \left( R_{T}\right) ^{2}=\frac{1}{T^{1-2c}}\left[ R_{1T}+R_{2T}%
\right] =O\left( \frac{\ln ^{2}T}{T^{1-2c}}\right) . 
\end{equation*}%
In particular,%
\begin{equation}
\plim_{T\rightarrow \infty }R_{T}=0.  \label{asy R kacke}
\end{equation}

\paragraph{Ad $S$.}

Recall $S_T$ in \eqref{R & S new}, namely
\begin{equation*}
S_{T}=\frac{1}{T^{1/2-c}}\sum_{t=1}^{T}t^{-c}w_{t-1}\varepsilon _{t}. 
\end{equation*}%
Since 
\begin{eqnarray}
w_{t-1} &=&\frac{1}{t}\sum_{i=1}^{t-1}\frac{O_{ti}\left( 1\right) }{i^{1-c}}%
\varepsilon _{i},  \label{w} \\
\E w_{t-1}^{2} &=&O\left( 1\right) \frac{1}{t^{2}}\sum_{i=1}^{t}\frac{%
1}{i^{2\left( 1-c\right) }}=O\left( t^{-2}\right) ,
\end{eqnarray}%
(cf. (\ref{v & w})) is $\mathcal{F}_{t-1}$-measurable and%
\begin{eqnarray*}
\E S_{T}^{2} &=&\frac{\sigma ^{2}}{T^{1-2c}}\sum_{t=1}^{T}t^{-2c}%
\E w_{t-1}^{2} \\
&=&O(1)\frac{1}{T^{1-2c}}\sum_{t=1}^{T}\frac{1}{t^{2\left( 1+c\right) }}%
=O\left( \frac{1}{T^{1-2c}}\right) .
\end{eqnarray*}%
In particular,%
\begin{equation}
\plim_{T\rightarrow \infty }S_{T}=0.  \label{asy S}
\end{equation}%

\section{On Remark \ref{rm:contDF}}
\label{apOnRem1}

The following considerations are based on a Theorem by L\'{e}vy, cf.\ \citet[Theorem 13.1.1]{Kawata72}:

\begin{theorem}[L\'{e}vy]
  Let $X_{n},n=0,1,\ldots ,$ be a
sequence of random variables with distribution functions $F_{k}$ s.t.\ the infinite sum
\begin{equation}
X=\sum_{n=0}^{\infty }X_{n}  \label{sum}
\end{equation}%
converges absolutely with probability one. Let $p_{n}$ denote the maximal jump of $F_{n}$, i.e.\ $p_{n}=\sup \left \{ F_{n}(x)-F_{n}(x-):x\in \mathbb{R}\right \} $, and $F$ the distribution function of $X.$\ Then the following is
true.
\begin{enumerate}[label=(\roman*)]
\item  If one of the $F_{n}$  is continuous, then also the distribution function $F$ is continuous.
\item If all $F_{n}$ have discontinuities, then a necessary and sufficient condition for $F$ to be continuous is that
\begin{equation*}
P=\prod \nolimits_{n=0}^{\infty }p_{n}
\end{equation*}%
diverges to zero.
\end{enumerate}

\end{theorem}

We will apply this result to the sequence $X_{n}=a_{n}\varepsilon _{n},$ where the $\varepsilon _{n}$ are iid with finite second moments and distribution function $F_{0},$ and the sequence $\left( a_{n}\right) \in l^{2}.$ The rhs
of (\ref{sum}) then converges a.s. (by Kolmogorov's theorem) and in $L^{2}.$ If $F_{0}$ is continuous, then so are the distribution functions $F_{n}\left( x\right) =F_{0}\left( a^{-n}x\right) $ of the $X_{n}$, and \emph{(i)} shows
that $F$ is continuous. If the points of discontinuity of $F_{0}$ are $ \left( x_{i}\right) ,$ with $p_{0}=\sup_{i}\left \{ F_{0}(x_{i})-F_{0}(x_{i}-)\right \} ,$ then the points of discontinuity of $ F_{n}$ are $\left(
  a^{-n}x_{i}\right) ,$ and the height of the jump of $ F_{n} $ at $a^{-n}x_{i}$ is just that of $F_{0}$ at $x_{i}.$ Hence $ p_{n}=p_{0}=\sup_{i}\left \{ F_{0}(x_{i})-F_{0}(x_{i}-)\right \} .$ Hence, whenever $p_{0}<1,$%
\begin{equation*}
\prod \nolimits_{n=0}^{N}p_{n}=p_{0}^{N+1}\rightarrow 0,
\end{equation*}%
so that continuity of $F$ follows from \emph{(ii)}. The case $p_{0}=1$
cannot occur unless $\varepsilon _{n}=0.$

This result covers all our needs for stable AR(1)-processes with \emph{iid integrable error terms}. It shows that the stationary distribution $F$ is \emph{always continuous, }except for the trivial case of zero errors (in which
case the stationary solution is zero). Cf. also \citet[Lemma 2]{LaiWei85}.


\section{On Remark \ref{rm:wv}}
\label{apOnRem7}

As to $v_{t},$ the predictable quadratic variation is%
\begin{equation*}
\left \langle v\right \rangle _{t}=\sigma ^{2}\sum_{i=1}^{t}\frac{\theta
_{i}^{2}}{i^{2\left( 1-c\right) }}=O\left( 1\right) \text{ \  \emph{a.s..}}
\end{equation*}%
Therefore, by the MCT for martingales with bounded predictable quadratic variation, $v_{t}$ converges a.s. to some finite random variable.

As to $w_{t},$ for every $\delta >0,$%
\begin{equation*}
\mathbf{P}\left( \left \vert w_{t}\right \vert >\delta \right) \leq \delta
^{-2}\E w_{t}^{2}=O\left( t^{2c-3}\right) ,
\end{equation*}%
so that%
\begin{equation*}
\sum_{t=1}^{\infty }\mathbf{P}\left( \left \vert w_{t}\right \vert >\delta
\right) <\infty .
\end{equation*}%
Therefore, by the Borel-Cantelli Lemma,%
\begin{equation*}
\mathbf{P}\left( \left \vert w_{t}\right \vert >\delta \text{ \  \emph{i.o.}}%
\right) =0.
\end{equation*}%
Equivalently, $\lim_{t\rightarrow \infty }w_{t}=0$ a.s..


\bibliographystyle{agsm}
\bibliography{ChristopeitMassmann.bib}

\end{document}