\documentclass[letterpaper,12pt]{article} \usepackage{amsfonts} \usepackage{amsmath, amsthm, bbm,astron} \usepackage{xcolor} \usepackage{xr} %for external documents labels \usepackage{multirow} \usepackage{float} \floatstyle{plaintop} \restylefloat{table} \usepackage{endnotes} \let\footnote=\endnote %%%%%%% %%%%%%% Define labels from main text: \newlabel{sec:model}{{2}{7}} \newlabel{model0}{{1}{7}} \newlabel{ass:id}{{ID}{9}} \newlabel{th:id}{{2.1}{10}} \newlabel{sec:estimator}{{3}{11}} \newlabel{DefCalL}{{2}{11}} \newlabel{DefQMLE}{{3}{11}} \newlabel{LNT123}{{4}{11}} \newlabel{ass:A1}{{1}{12}} \newlabel{ass:A2}{{2}{12}} \newlabel{ass:A3}{{3}{12}} \newlabel{ass:A4}{{4}{12}} \newlabel{th:consistency}{{3.1}{13}} \newlabel{sec:limdist}{{4}{14}} \newlabel{th:ass_expand}{{4.1}{16}} \newlabel{cor:limit}{{4.2}{16}} \newlabel{ass:A5}{{5}{17}} \newlabel{VAR}{{5}{19}} \newlabel{ass:A6}{{6}{20}} \newlabel{th:limdis}{{4.3}{20}} \newlabel{sec:BiasCorrection}{{4.3}{21}} \newlabel{def:estimators}{{1}{21}} \newlabel{ass:bc}{{7}{22}} \newlabel{th:biascorrection}{{4.4}{22}} \newlabel{corr:biascorrected}{{4.5}{23}} \newlabel{sec:testing}{{5}{24}} \newlabel{DefBtilde}{{6}{24}} \newlabel{DefWDs}{{7}{25}} \newlabel{DefLRs}{{8}{25}} \newlabel{EquivGrads}{{9}{26}} \newlabel{th:gradient}{{5.1}{27}} \newlabel{limNablaL}{{10}{27}} \newlabel{DefLMs}{{11}{27}} \newlabel{th:testing}{{5.2}{28}} \newlabel{sec:Endogenous Regression}{{6}{28}} \newlabel{sec:MC}{{7}{30}} \newlabel{sec:conclusion}{{8}{33}} \newlabel{app:consistency}{{A}{38}} \newlabel{lemma:Optimization}{{A.1}{38}} \newlabel{op1lemma}{{A.2}{39}} \newlabel{Bound1SNT}{{12}{40}} \newlabel{boundS1NT}{{13}{40}} \newlabel{app:limdis}{{B}{41}} \newlabel{lemma:vanishing}{{B.1}{42}} \newlabel{lemma:denCLT}{{B.2}{42}} \newlabel{VarEqOmega}{{14}{43}} \newlabel{tab:T1}{{1}{45}} \newlabel{tab:T2}{{2}{46}} \newlabel{tab:T3}{{3}{47}} \newlabel{tab:T4}{{4}{47}} \newlabel{tab:T5}{{5}{48}} \newlabel{tab:T6}{{6}{49}} \newlabel{tab:T7}{{7}{50}} \newlabel{tab:T8}{{8}{51}} %%%%%%% %%%%%%% \renewcommand{\thesection}{S.\arabic{section}} \renewcommand{\thetable}{S.\arabic{table}} \renewcommand{\baselinestretch}{1.5} \addtolength{\textwidth}{34mm} \addtolength{\oddsidemargin}{-17mm} \addtolength{\textheight}{34mm} \addtolength{\topmargin}{-17mm} \numberwithin{equation}{section} \newtheorem{assumption}{Assumption} \newtheorem{example}{Example} \newtheorem{theorem}{Theorem}[section] \newtheorem{acknowledgement}[theorem]{Acknowledgement} \newtheorem{definition}[theorem]{Definition} \newtheorem{lemma}[theorem]{Lemma} \input{tcilatex} \newcommand{\Tr}{{\rm Tr}} \begin{document} \title{ {\bf Supplementary Material} \\[20pt] Dynamic Linear Panel Regression Models \\ with Interactive Fixed Effects } \author{\setcounter{footnote}{2} Hyungsik Roger Moon\footnote{ Department of Economics and USC Dornsife INET, University of Southern California, Los Angeles, CA 90089-0253. Email: {\tt moonr@usc.edu}. Department of Economics, Yonsei University, Seoul, Korea. } \and Martin Weidner\footnote{ Department of Economics, University College London, Gower Street, London WC1E~6BT, U.K., and CeMMaP. Email: {\tt m.weidner@ucl.ac.uk}. }} \date{\today} \maketitle %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Proof of Identification (Theorem~\ref{th:id})} \label{app:identification} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{proof}[\bf Proof of Theorem~\ref{th:id}] Let $Q(\beta,\lambda,f) \equiv \mathbb{E}\left(\left\| Y \, - \, \beta \cdot X \, - \, \lambda \, f' \right\|^2_F \Big| \lambda^0, f^0, w \right)$, where $\beta \in \mathbb{R}^K$, $\lambda \in \mathbb{R}^{N\times R}$ and $f \in \mathbb{R}^{T\times R}$. We have \begin{align*} & Q(\beta,\lambda,f) \nonumber \\ &= \mathbb{E} \left\{ \Tr \left[ \left( Y \, - \, \beta \cdot X \, - \, \lambda \, f' \right)' \left( Y \, - \, \beta \cdot X \, - \, \lambda \, f' \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ &= \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X + e \right)' \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X + e \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ &= \mathbb{E} \left[ \Tr \left( e' e \right) \Big| \lambda^0, f^0, w \right] \nonumber \\ & \qquad + \underbrace{ \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right)' \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right) \right] \Big| \lambda^0, f^0, w \right\} }_{ \equiv Q^*(\beta,\lambda,f) } . \end{align*} In the last step we used Assumption~\ref{ass:id}$(ii)$. Because $ \mathbb{E} \left[ \Tr \left( e' e \right) \Big| \lambda^0, f^0, w \right]$ is independent of $\beta,\lambda,f$, we find minimizing $Q(\beta,\lambda,f)$ is equivalent to minimizing $Q^*(\beta,\lambda,f)$. We decompose $Q^*(\beta,\lambda,f)$ as follows \begin{align*} & Q^*(\beta,\lambda,f) \nonumber \\ &=\mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right)' \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ &= \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right)' M_{(\lambda,\lambda^0,w)} \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ & \quad + \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right)' P_{(\lambda,\lambda^0,w)} \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ &= \underbrace{ \mathbb{E} \left\{ \Tr \left[ \left( (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} \right)' M_{(\lambda,\lambda^0,w)} \left( (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} \right) \right] \Big| \lambda^0, f^0, w \right\} }_{\equiv Q^{\rm high}(\beta^{\rm high},\lambda) } \nonumber \\ & \quad + \underbrace{ \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right)' P_{(\lambda,\lambda^0,w)} \left( \lambda^0 f^{0 \prime} - \lambda f' - (\beta-\beta^0) \cdot X \right) \right] \Big| \lambda^0, f^0, w \right\} }_{\equiv Q^{\rm low}(\beta,\lambda,f) } , \end{align*} where $ (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} = \sum_{m=K_1+1}^K (\beta_m-\beta^0_m) X_m$. A lower bound on $ Q^{\rm high}(\beta^{\rm high},\lambda)$ is given by \begin{align} & Q^{\rm high}(\beta^{\rm high},\lambda) \nonumber \\ &\geq \min_{\widetilde \lambda \in \mathbb{R}^{N \times (R+R+{\rm rank}(w))}} \mathbb{E} \left\{ \Tr \left[ \left( (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} \right)' M_{(\widetilde \lambda,\lambda,w)} \left( (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ &= \sum_{r=R+R+{\rm rank}(w)}^{\min(N,T)} \mu_r\left\{ \mathbb{E}\left[ \left( (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} \right) \left( (\beta^{\rm high}-\beta^{0, {\rm high}}) \cdot X_{\rm high} \right)' \Big| \lambda^0, f^0, w \right] \right\}. \label{LowerBoundQhigh} \end{align} Because $Q^*(\beta,\lambda,f)$, $Q^{\rm high}(\beta^{\rm high},\lambda)$, and $Q^{\rm low}(\beta,\lambda,f)$, are expectations of traces of positive semi-definite matrices we have $Q^*(\beta,\lambda,f) \geq 0$, $Q^{\rm high}(\beta^{\rm high},\lambda) \geq 0$, and $Q^{\rm low}(\beta,\lambda,f) \geq 0$ for all $\beta$, $\lambda$, $f$. Let $\bar \beta$, $\bar \lambda$ and $\bar f$ be the parameter values that minimize $Q(\beta,\lambda,f)$, and thus also $Q^*(\beta,\lambda,f)$. Because $Q^*(\beta^0,\lambda^0,f^0)=0$ we have $Q^*(\bar \beta,\bar \lambda,\bar f) = \min_{\beta,\lambda,f} Q^*(\beta,\lambda,f) = 0$. This implies $Q^{\rm high}(\bar \beta^{\rm high},\bar \lambda) = 0$ and $Q^{\rm low}(\bar \beta,\bar \lambda,\bar f) = 0$. Assumption~\ref{ass:id}$(v)$, the lower bound \eqref{LowerBoundQhigh}, and $Q^{\rm high}(\bar \beta^{\rm high},\bar \lambda) = 0$ imply $\bar \beta^{\rm high} = \beta^{0, {\rm high}}$. Using this, we find \begin{align} & Q^{\rm low}(\bar \beta,\bar \lambda,\bar f) \nonumber \\ &= \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \bar \lambda \bar f' - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right)' \left( \lambda^0 f^{0 \prime} - \bar \lambda \bar f' - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right) \right] \Big| \lambda^0, f^0, w \right\} , \nonumber \\ &\geq \min_f \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - \bar \lambda f' - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right)' \left( \lambda^0 f^{0 \prime} - \bar \lambda f' - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right) \right] \Big| \lambda^0, f^0, w \right\} \nonumber \\ &= \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right)' M_{\bar \lambda} \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right) \right] \Big| \lambda^0, f^0, w \right\} , \label{QlowLowBound} \end{align} where $ (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} = \sum_{l=1}^{K_1} (\bar \beta_l-\beta^0_l) X_l$. Because $Q^{\rm low}(\bar \beta,\bar \lambda,\bar f) = 0$ and the last expression in \eqref{QlowLowBound} is non-negative we must have \begin{align*} \mathbb{E} \left\{ \Tr \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right)' M_{\bar \lambda} \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right) \right] \Big| \lambda^0, f^0, w \right\} &= 0. \end{align*} Using $M_{\bar \lambda} =M_{\bar \lambda} M_{\bar \lambda}$ and the cyclicality of the trace we obtain from the last equality: \begin{align*} \Tr \bigg\{ M_{\bar \lambda} A M_{\bar \lambda} \bigg\} = 0, \end{align*} where $A =\mathbb{E} \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right) \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right)' \Big| \lambda^0, f^0, w \right]$. The trace of a positive semi-definite matrix is only equal to zero if the matrix itself is equal to zero, so we find \begin{align*} M_{\bar \lambda} A M_{\bar \lambda} &= 0, \end{align*} This together with the fact that $A$ itself is positive semi definite implies (note that $A$ positive semi-definite implies $A=CC'$ for some matrix $C$, and $M_{\bar \lambda} A M_{\bar \lambda} = 0$ then implies $M_{\bar \lambda} C = 0$, i.e., $C = P_{\bar \lambda} C$) \begin{align*} A &= P_{\bar \lambda} A P_{\bar \lambda} , \end{align*} and therefore ${\rm rank}(A) \leq {\rm rank}( P_{\bar \lambda} ) \leq R$. We have thus shown \begin{align*} {\rm rank} \left\{ \mathbb{E} \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right) \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right)' \Big| \lambda^0, f^0, w \right] \right\} \leq R. \end{align*} We furthermore find \begin{align*} R &\geq {\rm rank} \left\{ \mathbb{E} \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right) \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right)' \Big| \lambda^0, f^0, w \right] \right\} \nonumber \\ & \geq {\rm rank} \left\{ M_{w} \mathbb{E} \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right) P_{f^0} \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right)' M_w \Big| \lambda^0, f^0, w \right] \right\} \nonumber \\ & \quad + {\rm rank} \left\{ P_{w} \mathbb{E} \left[ \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right) M_{f^0} \left( \lambda^0 f^{0 \prime} - (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right)' P_w \Big| \lambda^0, f^0, w \right] \right\} \nonumber \\ & \geq {\rm rank} \left[ M_{w} \lambda^0 f^{0 \prime} f^0 \lambda^{0 \prime} M_w \right] \nonumber \\ & \quad + {\rm rank} \left\{ \mathbb{E} \left[ \left( (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right) M_{f^0} \left( (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right)' \Big| \lambda^0, f^0, w \right] \right\} . \end{align*} Assumption~\ref{ass:id}$(iv)$ guarantees ${\rm rank} \left( M_{w} \lambda^0 f^{0 \prime} f^0 \lambda^{0 \prime} M_w \right) = {\rm rank} \left( \lambda^0 f^{0 \prime} f^0 \lambda^{0 \prime} \right) = R$, that is, we must have \begin{align*} \mathbb{E} \left[ \left( (\bar \beta^{\rm low}-\beta^{0, {\rm low}}) \cdot X_{\rm low} \right) M_{f^0} \left( (\bar \beta^{\rm low}-\beta^{0,{\rm low}}) \cdot X_{\rm low} \right)' \Big| \lambda^0, f^0, w \right] =0. \end{align*} According to Assumption~\ref{ass:id}$(iii)$ this implies $\bar \beta^{\rm low} =\beta^{0,{\rm low}}$, i.e., we have $\bar \beta = \beta^0$. This also implies $Q^*(\bar \beta,\bar \lambda,\bar f) = \| \lambda^0 f^{0 \prime} - \bar \lambda \bar f' \|_F^2 =0$, and therefore $\bar \lambda \bar f' = \lambda^0 f^{0 \prime}$. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Examples of Error Distributions} \label{app:error} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% The following Lemma provides examples of error distributions that satisfy $\|e\|={\cal O}_p(\sqrt{\max(N,T)})$ as $N,T \rightarrow \infty$. Example (i) is particularly relevant for us, because those assumptions on $e_{it}$ are imposed in Assumption~\ref{ass:A5} in the main text, i.e., under those main text assumptions we indeed have $\|e\|={\cal O}_p(\sqrt{\max(N,T)})$. \begin{lemma} \label{lemma:Enorm} For each of the following distributional assumptions on the errors $e_{it}$, $i=1,\ldots,N$, $t=1,\ldots,T$, we have $\|e\|={\cal O}_p(\sqrt{\max(N,T)})$. \begin{itemize} \item[(i)] The $e_{it}$ are independent across $i$ and $t$, conditional on ${\cal C}$, and satisfy $\mathbb{E}(e_{it} | {\cal C}) = 0$, and $\mathbb{E}(e_{it}^4 | {\cal C})$ is bounded uniformly by a non-random constant, uniformly over $i,t$ and $N,T$. Here ${\cal C}$ can be any conditioning sigma-field, including the empty one (corresponding to unconditional expectations). \item[(ii)] The $e_{it}$ follow different ${\rm MA}(\infty)$ processes for each $i$, namely \begin{align} e_{it} &= \sum_{\tau=0}^\infty \, \psi_{i\tau} \, u_{i,t-\tau} \; , \qquad \text{for } i=1\ldots N, \; t=1\ldots T \; , \label{errorMA} \end{align} where the $u_{it}$, $i=1\ldots N$, $t=-\infty \ldots T$ are independent random variables with $\mathbb{E} u_{it} =0$ and $\mathbb{E} u_{it}^4$ uniformly bounded across $i,t$ and $N,T$. The coefficients $\psi_{i\tau}$ satisfy \begin{align} \sum_{\tau=0}^\infty \, \tau \, \max_{i=1\ldots N} \, \psi_{i\tau}^2 \, &< \, B \; , & \sum_{\tau=0}^\infty \, \max_{i=1\ldots N} \left| \psi_{i\tau} \right| \, &< \, B \; , \label{MArestrictionsPSI} \end{align} for a finite constant $B$ which is independent of $N$ and $T$. \item[(iii)] The error matrix $e$ is generated as $e=\sigma^{1/2} \, u \, \Sigma^{1/2}$, where $u$ is an $N \times T$ matrix with independently distributed entries $u_{it}$ and $\mathbb{E} u_{it}=0$, $\mathbb{E} u_{it}^2=1$, and $\mathbb{E} u_{it}^4$ is bounded uniformly across $i,t$ and $N,T$. Here $\sigma$ is the $N\times N$ cross-sectional covariance matrix, and $\Sigma$ is the $T\times T$ time-serial covariance matrix, and they satisfy \begin{align} \max_{j=1\ldots N} \, \sum_{i=1}^N \, \left| \sigma_{ij} \right| \, &< \, B \; , & \max_{\tau=1\ldots T} \, \sum_{t=1}^T \, \left| \Sigma_{t\tau} \right| \, &< \, B \; , \end{align} for some finite constant $B$ which is independent of $N$ and $T$. In this example we have $\mathbb{E} e_{it} e_{j\tau} = \sigma_{ij} \Sigma_{t\tau}$. \end{itemize} \end{lemma} \begin{proof}[\bf Proof of Lemma~\ref{lemma:Enorm}, Example (i)] Latala \cite*{Latala2006} showed that for a $N\times T$ matrix $e$ with independent entries, conditional on ${\cal C}$, we have \begin{align*} \mathbb{E}\left(\| e \| \, \big| {\cal C} \right) \, \leq \, c \left\{ \max_i \left[ \sum_t \mathbb{E}\left( e_{it}^2 \, \big| {\cal C} \right) \right]^{1/2} +\max_j \left[ \sum_i \mathbb{E} \left( e_{it}^2 \, \big| {\cal C} \right) \right]^{1/2} + \left[ \sum_{i,t} \mathbb{E} \left( e_{it}^4 \, \big| {\cal C} \right) \right]^{1/4} \right\} \; , \end{align*} where $c$ is some universal constant. Because we assumed uniformly bounded $4$th conditional moments for $e_{it}$ we thus have $\|e\| = {\cal O}_P(\sqrt{T})+{\cal O}_P(\sqrt{N})+{\cal O}_P((TN)^{1/4}) = {\cal O}_p(\sqrt{\max(N,T)})$. \end{proof} \begin{proof}[\bf Example (ii)] Let $\psi_j = (\psi_{1j}, \ldots , \psi_{Nj})$ be an $N \times 1$ vector for each $j \geq 0$. Let $U_{-j}$ be an $N\times T$ sub-matrix of $(u_{it})$ consisting of $u_{it}$, $i=1\ldots N$, $t=1-j,\ldots,T-j$. We can then write equation \eqref{errorMA} in matrix notation as \begin{align*} e &= \sum_{j=0}^\infty \, \limfunc{diag}(\psi_j) \, U_{-j} \nonumber \\ &= \sum_{j=0}^T \, \limfunc{diag}(\psi_j) \, U_{-j} + r_{NT} , \end{align*} where we cut the sum at $T$, which results in the remainder $r_{NT}= \sum_{j=T+1}^\infty \, \limfunc{diag}(\psi_j) \, U_{-j}$. When approximating an ${\rm MA}(\infty)$ by a finite ${\rm MA}(T)$ process we have for the remainder \begin{align*} \mathbb{E} \left(\| r_{NT} \|_{F}\right)^2 \, = \sum_{i=1}^N \, \sum_{t=1}^T \, \mathbb{E} \left( r_{NT} \right)_{ij}^2 \, &\leq \, \sigma_u^2 \, \sum_{i=1}^N \, \sum_{t=1}^T \, \sum_{j=T+1}^\infty \, \psi_{ij}^2 \nonumber \\ &\leq \sigma_u^2 \, N \, T \, \sum_{j=T+1}^\infty \, \max_i\left( \psi_{ij}^2 \right) \nonumber \\ &\leq \sigma_u^2 \, N \, \sum_{j=T+1}^\infty \, j \, \max_i\left( \psi_{ij}^2 \right) \;, \end{align*} where $\sigma_u^2$ is the variance of $u_{it}$. Therefore, for $T \rightarrow \infty$ we have \begin{align*} \mathbb{E} \left( \frac{ \left(\| r_{NT} \|_{F}\right)^2 } N \right) \, \longrightarrow \, 0 \; , \end{align*} which implies $\left(\| r_{NT} \|_{F}\right)^2 = {\cal O}_p(N)$, and therefore $\| r_{NT} \| \leq \| r_{NT} \|_{F} = {\cal O}_p(\sqrt{N})$. Let $V$ be the $N\times 2T$ matrix consisting of $u_{it}$, $i=1\ldots N$, $t=1-T,\ldots,T$. For $j=0\ldots T$ the matrices $U_{-j}$ are sub-matrices of $V$, and therefore $\| U_{-j} \| \leq \|V\|$. From example (i) we know $\| V \| = {\cal O}_p(\sqrt{\max(N,2T)})$. Furthermore, we know $\| \limfunc{diag}(\psi_j) \| \leq \max_i\left( \left| \psi_{ij} \right| \right)$. Combining these results we find \begin{align*} \left\| e \right\| &\leq \, \sum_{j=0}^T \, \| \limfunc{diag}(\psi_j) \| \, \|U_{-j}\| + \|r_{NT}\| \nonumber \\ &\leq \, \sum_{j=0}^T \, \max_i\left( \left| \psi_{ij} \right| \right) \|V\| + o_p(\sqrt{N}) \nonumber \\ &\leq \, \left[ \sum_{j=0}^\infty \, \max_i\left( \left| \psi_{ij} \right| \right) \right] {\cal O}_p(\sqrt{\max(N,2T)}) + o_p(\sqrt{N}) \nonumber \\ &\leq \, {\cal O}_p(\sqrt{\max(N,T)}), \end{align*} as required for the proof. \end{proof} \begin{proof}[\bf Example (iii)] Because $\sigma$ and $\Sigma$ are positive definite, there exits a symmetric $N\times N$ matrix $\phi$ and a symmetric $T\times T$ matrix $\psi$ such that $\sigma=\phi^2$ and $\Sigma=\psi^2$. The error term can then be generated as $e= \phi u \psi$, where $u$ is an $N\times T$ matrix with iid entries $u_{it}$ such that $\mathbb{E}(u_{it})=0$ and $\mathbb{E}(u_{it}^4)<\infty$. Given this definition of $e$ we immediately have $\mathbb{E} e_{it} = 0$ and $\mathbb{E} e_{it} e_{j\tau} = \sigma_{ij} \Sigma_{t\tau}$. What is left to show is $\| e \|={\cal O}_p(\sqrt{\max(N,T)})$. From example (i) we know $\| u \|={\cal O}_p(\sqrt{\max(N,T)})$. Using the inequality $\| \sigma \| \leq \sqrt{ \| \sigma \|_1 \, \| \sigma \|_\infty} = \| \sigma \|_1$, where $\| \sigma \|_1 = \| \sigma \|_\infty$ because $\sigma$ is symmetric we find \begin{align*} \| \sigma \| \leq \| \sigma \|_1 \, \equiv \, \max_{j=1\ldots N} \, \sum_{i=1}^N \, \left| \sigma_{ij} \right| \, &< \, L \; , & \end{align*} and analogously $\| \Sigma \| < L$. Because $\| \sigma\| = \| \phi\|^2$ and $\|\Sigma\| = \| \psi \|^2$, we thus find $\|e \| \leq \| \phi \| \|u\| \|\psi \| \leq L {\cal O}_p(\sqrt{\max(N,T)})$, i.e., $\| e \|={\cal O}_p(\sqrt{\max(N,T)})$. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Comments on Assumption \ref{ass:A4} on the regressors} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Consistency of the LS estimator $\widehat \beta$ requires the regressors not only satisfy the standard non-collinearity condition in assumption \ref{ass:A4}(i), but also the additional conditions on high- and low-rank regressors in assumption \ref{ass:A4}(ii). Bai \cite*{Bai2009} considers the special cases of only high-rank and only low-rank regressors. As low-rank regressors he considers only cross-sectional invariant and time-invariant regressors, and he shows that if only these two types of regressors are present, one can show consistency under the assumption $\limfunc{plim}_{N,T\rightarrow \infty} W_{NT} > 0$ on the regressors (instead of assumption \ref{ass:A4}), where $W_{NT}$ is the $K\times K$ matrix defined by $W_{NT,k_1 k_2} = (NT)^{-1} \, {\rm Tr}(M_{f^0} \, X^{\prime}_{k_1} \, M_{\lambda^0} \, X_{k_2})$. This matrix appears as the approximate Hessian in the profile objective expansion in theorem \ref{th:ass_expand}, i.e., the condition $\limfunc{plim}_{N,T\rightarrow \infty} W_{NT} > 0$ is very natural in the context of the interactive fixed effect models, and one may wonder whether also for the general case one can replace assumption \ref{ass:A4} with this weaker condition and still obtain consistency of the LS estimator. Unfortunately, this is not the case, and below we present two simple counter examples that show this. \begin{itemize} \item[(i)] Let there only be one factor ($R=1$) $f^0_t$ with corresponding factor loadings $\lambda^0_i$. Let there only be one regressor ($K=1$) of the form $X_{it}=w_i v_t + \lambda^0_i f^0_t$. Assume the $N\times 1$ vector $w=(w_1,\ldots,w_N)'$, and the $T\times 1$ vector $v=(v_1,\ldots,v_N)'$ are such that the $N\times 2$ matrix $\Lambda=(\lambda^0,w)$ and and the $T\times 2$ matrix $F=(f^0,v)$ satisfy $\limfunc{plim}_{N,T \rightarrow \infty}\left(\Lambda^{\prime} \Lambda/N\right) > 0$, and $\limfunc{plim}_{N,T \rightarrow \infty} \left( F^{\prime} F / T \right) > 0$. In this case, we have $W_{NT}=(NT)^{-1} \, {\rm Tr}(M_{f^0} \, v w' \, M_{\lambda^0} \, w v')$, and therefore $\limfunc{plim}_{N,T\rightarrow \infty} W_{NT} =\limfunc{plim}_{N,T\rightarrow \infty} (NT)^{-1} \, {\rm Tr}(M_{f^0} \, v w' \, M_{\lambda^0} \, w v') > 0$. However, $\beta$ is not identified because $\beta^0 X + \lambda^0 f^{0\prime} = (\beta^0+1) X - w v'$, i.e., it is not possible to distinguish $(\beta,\lambda,f)=(\beta^0,\lambda^0,f^0)$ and $(\beta,\lambda,f)=(\beta^0+1,-w,v)$. This implies that the LS estimator is not consistent (both $\beta^0$ and $\beta^0+1$ could be the true parameter, but the LS estimator cannot be consistent for both). \item[(ii)] Let there only be one factor ($R=1$) $f^0_t$ with corresponding factor loadings $\lambda^0_i$. Let the $N\times 1$ vectors $\lambda^0$, $w_1$ and $w_2$ be such that $\Lambda=(\lambda^0,w_1,w_2)$ satisfies $\limfunc{plim}_{N,T \rightarrow \infty}\left(\Lambda^{\prime} \Lambda/N\right) > 0$. Let the $T\times 1$ vectors $f^0$, $v_1$ and $v_2$ be such that $F=(f^0,v_1,v_2)$ satisfies $\limfunc{plim}_{N,T \rightarrow \infty} \left( F^{\prime} F / T \right) > 0$. Let there be four regressors ($K=4$) defined by $X_1=w_1 v_1'$, $X_2=w_2 v_2'$, $X_3=(w_1+\lambda^0)(v_2+f^0)'$, $X_4=(w_2+\lambda^0)(v_1+f^0)'$. In this case, one can easily check $\limfunc{plim}_{N,T\rightarrow \infty} W_{NT} > 0$. However, again $\beta_k$ is not identified, because $\sum_{k=1}^4 \beta^0_k X_k + \lambda^0 f^{0\prime} = \sum_{k=1}^4 (\beta^0_k+1) X_k - (\lambda^0+w_1+w_2) (f^{0\prime}+v_1+v_2)'$, i.e., we cannot distinguish between the true parameters and $(\beta,\lambda,f)=(\beta^0+1,\,-\lambda^0-w_1-w_2,\,f^{0\prime}+v_1+v_2)$. Again, as a consequence the LS estimator is not consistent in this case. \end{itemize} In example (ii), there are only low-rank regressors with ${\rm rank}(X_l)=1$. One can easily check assumption \ref{ass:A4} is not satisfied for this example. In example (i) the regressor is a low-rank regressor with ${\rm rank}(X)=2$. In our present version of assumption \ref{ass:A4} we only consider low-rank regressors with ${\rm rank}(X)=1$, but (as already noted in a footnote in the main paper) it is straightforward to extend the assumption and the consistency proof to low-rank regressors with rank larger than one. Independent of whether we extend the assumption or not, the regressor $X$ of example (i) fails to satisfy assumption \ref{ass:A4}. This justifies our formulation of assumption \ref{ass:A4}, because it shows in general the assumption cannot be replaced by the weaker condition $\limfunc{plim}_{N,T\rightarrow \infty} W_{NT} > 0$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Some Matrix Algebra (including Proof of Lemma~\ref{lemma:Optimization})} \label{app:matrix} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% The following statements are true for real matrices (throughout the whole paper and supplementary material we never use complex numbers anywhere). Let $A$ be an arbitrary $n\times m$ matrix. In addition to the operator (or spectral) norm $\|A\|$ and to the Frobenius (or Hilbert-Schmidt) norm $\|A\|_{F}$, it is also convenient to define the $1$-norm, the $\infty$-norm, and the $\max$-norm by \begin{align*} \| A \|_1 \, &= \, \max_{j=1\ldots m} \, \sum_{i=1}^n \, \left| A_{ij} \right| \; , & \| A \|_\infty \, &= \, \max_{i=1\ldots n} \, \sum_{j=1}^m \, \left| A_{ij} \right| \; , & \| A \|_{\max} \, &= \, \max_{i=1\ldots n} \, \max_{j=1 \ldots m} \, \left| A_{ij} \right| \; . & \end{align*} \begin{lemma}[Some useful inequalities] \label{lemma:inequalities} Let A be an $n\times m$ matrix, $B$ be an $m\times p$ matrix, and $C$ and $D$ be $n\times n$ matrices. Then we have: \begin{align*} \text{(i)}& \qquad \left\| A\right\| \, \leq \, \left\| A\right\|_{F} \, \leq \, \left\| A\right\| \, \limfunc{rank}\left( A\right)^{1/2} \; , \nonumber \\ \text{(ii)}& \qquad \left\| AB \right\| \, \leq \, \left\| A\right\| \left\|B\right\| \; , \nonumber \\ \text{(iii)}& \qquad \left\| AB \right\|_{F} \, \leq \, \left\| A\right\|_{F} \left\|B\right\| \, \leq \, \left\| A\right\|_{F} \left\|B\right\|_{F} \; , \nonumber \\ \text{(iv)}& \qquad |{\rm Tr}(AB)| \, \leq \, \left\| A\right\|_{F} \left\|B\right\|_{F} \; , \qquad \text{for $n=p$,} \nonumber \\ \text{(v)}& \qquad \left| {\rm Tr}\left( C\right) \right| \leq \left\| C\right\| \limfunc{rank}\left( C\right) \; , \nonumber \\ \text{(vi)}& \qquad \left\| C\right\| \leq {\rm Tr}\left( C\right) \; , \qquad \text{for $C$ symmetric and $C\geq0$,} \nonumber \\ \text{(vii)}& \qquad \|A\|^2 \, \leq \, \|A\|_1 \, \|A\|_{\infty} \; , \nonumber \\ \text{(viii)}& \qquad \|A\|_{\max} \, \leq \, \|A\| \, \leq \, \sqrt{nm} \, \|A\|_{\max} \; , \nonumber \\ \text{(ix)}& \qquad \|A' C A \| \leq \|A' D A \| \; , \qquad \text{for $C$ symmetric and $C\leq D$.} \nonumber \\ & \text{For $C$, $D$ symmetric, and $i=1,\ldots,n$ we have:} \nonumber \\ \text{(x)} & \qquad %\label{th:Weyl} {\mu}_i(C) + {\mu}_n(D) \, \leq \, {\mu}_i(C+D) \, \leq \, {\mu}_i(C) + {\mu}_1(D) \; , \nonumber \\ \text{(xi)} & \qquad {\mu}_i(C) \leq \, {\mu}_i(C+D) \; , \qquad \text{for $D\geq0$,} \nonumber \\ \text{(xii)} & \qquad {\mu}_i(C) - \|D\| \, \leq \, {\mu}_i(C+D) \, \leq \, {\mu}_i(C) + \|D\| \; . \end{align*} \end{lemma} \begin{proof}[\bf Proof]%[Proof of Lemma~\ref{lemma:inequalities}] Here we use notation $s_i(A)$ for the $i$th largest singular value of a matrix $A$. \\ (i) We have $\|A\|=s_1(A)$, and $\|A\|_F^2=\sum_{i=1}^{{\rm rank}(A)} (s_i(A))^2$. The inequalities follow directly from this representation. (ii) This inequality is true for all unitarily invariant norms, see, e.g., Bhatia \cite*{Bhatia97}. (iii) can be shown as follows \begin{align*} \left\| AB \right\|_{F}^2 &= {\rm Tr}(ABB'A') \nonumber \\ &= {\rm Tr}[\|B\|^2 \, AA' - A(\|B\|^2\mathbb{I}-BB')A'] \nonumber \\ &\leq \|B\|^2 {\rm Tr}(AA') = \|B\|^2 \, \left\| A \right\|_{F}^2 \; , \end{align*} where we used $A(\|B\|^2\mathbb{I}-BB')A'$ is positive definite. Relation (iv) is just the Cauchy Schwarz inequality. To show (v) we decompose $C=UDO'$ (singular value decomposition), where $U$ and $O$ are $n\times {\rm rank}(C)$ that satisfy $U'U=O'O=\mathbb{I}$ and $D$ is a ${\rm rank}(C) \times {\rm rank}(C)$ diagonal matrix with entries $s_i(C)$. We then have $\|O\|=\|U\|=1$ and $\|D\|=\|C\|$ and therefore \begin{align*} |{\rm Tr}(C)| &= |{\rm Tr}(UDO')| = |{\rm Tr}(DO'U)| \nonumber \\ &= \left|\sum_{i=1}^{{\rm rank}(C)} \, \eta_i' DO'U \eta_i \right| \nonumber \\ &\leq \sum_{i=1}^{{\rm rank}(C)} \|D\| \|O'\| \|U\| = {\rm rank}(C) \|C\| \; . \end{align*} For (vi) let $e_1$ be a vector that satisfies $\|e_1\|=1$ and $\left\| C\right\| = e_1' C e_1$. Because $C$ is symmetric such an $e_1$ has to exist. Now choose $e_i$, $i=2,\ldots,n$, such that $e_i$, $i=1,\ldots,n$, becomes a orthonormal basis of the vector space of $n\times 1$ vectors. Because $C$ is positive semi definite we then have ${\rm Tr}\left( C\right) = \sum_{i} e_i' C e_i \geq e_1 C e_1 = \|C\|$, which is what we wanted to show. For (vii) we refer to Golub and van Loan \cite*{golubvanloan1996}, p.15. For (viii) let $e$ be the vector that satisfies $\|e\|=1$ and $\|A' C A\|=e' A' C A e$. Because $A' C A$ is symmetric such an $e$ has to exist. Because $C\leq D$ we then have $\|C\|= (e' A') C (A e) \leq (e' A') D (A e) \leq \|A'DA\|$. This is what we wanted to show. For inequality (ix) let $e_1$ be a vector that satisfied $\|e_1\|=1$ and $\left\| A'C A\right\| = e_1' A' C A e_1$. Then we have $\left\| A'C A\right\| = e_1' A' D A e_1 - e_1' A' (D-C) A e_1 \leq e_1' A' D A e_1 \leq \|A'DA\|$. Statement (x) is a special case of Weyl's inequality, see, e.g., Bhatia \cite*{Bhatia97}. The inequalities (xi) and (xii) follow directly from (ix) because ${\mu}_n(D)\geq 0$ for $D\geq 0$, and because $-\|D\| \leq {\mu}_i(D)\leq \|D\|$ for $i=1,\ldots,n$. \end{proof} \begin{definition} \label{def:angle} Let $A$ be an $n\times r_1$ matrix and $B$ be an $n \times r_2$ matrix with ${\rm rank}(A)=r_1$ and ${\rm rank}(B)=r_2$. The smallest principal angle $\theta_{A,B} \in [0,\pi/2]$ between the linear subspaces ${\rm span}(A)=\{A a | \, a \in \mathbb{R}^{r_1} \}$ and ${\rm span}(B)=\{B b | \, b \in \mathbb{B}^{r_2} \}$ of $\mathbb{R}^n$ is defined by \begin{align*} \cos(\theta_{A,B}) &= \max_{0 \neq a \in \mathbb{R}^{r_1}} \max_{0\neq b \in \mathbb{R}^{r_2}} \frac{a' A' B b} {\|A a\| \|B b\|} \, . \end{align*} \end{definition} \begin{lemma} \label{lemma:angle} Let $A$ be an $n\times r_1$ matrix and $B$ be an $n \times r_2$ matrix with ${\rm rank}(A)=r_1$ and ${\rm rank}(B)=r_2$. Then we have the following alternative characterizations of the smallest principal angle between ${\rm span}(A)$ and ${\rm span}(B)$ \begin{align*} \sin(\theta_{A,B}) &= \min_{0 \neq a \in \mathbb{R}^{r_1}} \, \frac{\| M_B \, A \, a \|} {\|A \, a\|} \nonumber \\ &= \min_{0 \neq b \in \mathbb{R}^{r_2}} \, \frac{\| M_A \, B \, b \|} {\|B \, b\|} \; . \end{align*} \end{lemma} \begin{proof}[\bf Proof]%[Proof of Lemma~\ref{lemma:angle}] Because $\| M_B \, A \, a \|^2 + \| P_B \, A \, a \|^2 = \|A\,a\|^2$ and $\sin(\theta_{A,B})^2 + \cos(\theta_{A,B})^2 = 1$, we find proving the theorem is equivalent to proving \begin{align*} \cos(\theta_{A,B}) &= \min_{0 \neq a \in \mathbb{R}^{r_1}} \, \frac{\| P_B \, A \, a \|} {\|A \, a\|} = \min_{0 \neq b \in \mathbb{R}^{r_2}} \, \frac{\| P_A \, B \, b \|} {\|A \, b\|} \; . \end{align*} This last statement is theorem 8 in Galantai and Hegedus \cite*{GalantaiHegedus2006}, and the proof can be found there. \end{proof} \begin{proof}[\bf Proof of Lemma~\ref{lemma:Optimization}] Let \begin{align*} S_1(Z) &= \min_{f,\lambda} {\rm Tr}\left[ \left(Z-\lambda f'\right) \left(Z'-f \lambda'\right)\right] \; , \nonumber \\ S_2(Z) &= \min_f {\rm Tr}(Z \, M_f \, Z') \; , \nonumber \\ S_3(Z) &= \min_\lambda {\rm Tr}(Z' \, M_\lambda \, Z) \; , \nonumber \\ S_4(Z) &= \min_{\tilde \lambda,\tilde f} {\rm Tr}(M_{\widetilde \lambda} \, Z \, M_{\widetilde f} \, Z') \; , \nonumber \\ S_5(Z) &= \sum_{i=R+1}^{T} {\mu}_i(Z'Z) \; , \nonumber \\ S_6(Z) &= \sum_{i=R+1}^{N} {\mu}_i(ZZ') \; . \end{align*} The theorem claims \begin{align*} S_1(Z) \, &= \, S_2(Z) \, = \, S_3(Z) \, = \, S_4(Z) \, = \, S_5(Z) \, = \, S_6(Z) \; . \end{align*} We find: \begin{itemize} \item[(i)] The non-zero eigenvalues of $Z'Z$ and $ZZ'$ are identical, so in the sums in $S_5(Z)$ and in $S_6(Z)$ we are summing over identical values, which shows $S_5(Z)=S_6(Z)$. \item[(ii)] Starting with $S_1(Z)$ and minimizing with respect to $f$ we obtain the first-order condition \begin{align*} \lambda^{\prime}\, Z &= \lambda^{\prime}\, \lambda \, f^{\prime} \; . \end{align*} Putting this into the objective function we can integrate out $f$, namely \begin{align*} {\rm Tr}\left[ \left( Z - \lambda f^{\prime }\right) ^{\prime }\left( Z - \lambda f^{\prime }\right) \right] &= {\rm Tr}\left( Z^{\prime} Z - Z^{\prime} \lambda f^{\prime}\right) \notag \\ &= {\rm Tr}\left( Z^{\prime} Z - Z^{\prime} \lambda (\lambda^{\prime}\lambda)^{-1} (\lambda^{\prime}\lambda) f^{\prime}\right) \notag \\ &= {\rm Tr}\left( Z^{\prime} Z - Z^{\prime} \lambda (\lambda^{\prime}\lambda)^{-1} (\lambda^{\prime}\lambda) \lambda^{\prime}\, Z \right) \notag \\ &= {\rm Tr}\left( Z^{\prime} \, M_\lambda \, Z \right) \; . \end{align*} This shows $S_1(Z)=S_3(Z)$. Analogously, we can integrate out $\lambda$ to obtain $S_1(Z)=S_2(Z)$. \item[(iii)] Let $M_{\widehat \lambda}$ be the projector on the $N-R$ eigenspaces corresponding to the $N-R$ smallest eigenvalues\footnote{% If an eigenvalue has multiplicity $m$, we count it $m$ times when finding the $N-R$ smallest eigenvalues. In this terminology we always have exactly $N$ eigenvalues of $ZZ^{\prime}$, but some may appear multiple times.} of $ZZ^{\prime}$, let $P_{\widehat \lambda} = \mathbb{I}_N - M_{\widehat \lambda}$, and let $\omega_{R}$ be the $R$'th largest eigenvalue of $ZZ^{\prime}$. We then know the matrix $P_{\widehat \lambda} [Z Z^{\prime}-\omega_R \mathbb{I}_N ] P_{\widehat \lambda} -M_{\widehat \lambda} [Z Z^{\prime}-\omega_R \mathbb{I}_N ] M_{\widehat \lambda}$ is positive semi-definite. Thus, for an arbitrary $% N\times R$ matrix $\lambda$ with corresponding projector $M_\lambda$ we have \begin{align*} 0 &\leq {\rm Tr} \left\{ \left( P_{\widehat \lambda} [Z Z^{\prime}-\omega_R \mathbb{I}_N ] P_{\widehat \lambda} -M_{\widehat \lambda} [Z Z^{\prime}-\omega_R \mathbb{I}_N ] M_{\widehat \lambda} \right) \left( M_{\lambda} - M_{\widehat \lambda} \right)^2 \right\} \notag \\ &= {\rm Tr} \left\{ \left( P_{\widehat \lambda} [Z Z^{\prime}-\omega_R \mathbb{I}_N ] P_{\widehat \lambda} + M_{\widehat \lambda} [Z Z^{\prime}-\omega_R \mathbb{I}_N ] M_{\widehat \lambda} \right) \left( M_{\lambda} - M_{\widehat \lambda} \right) \right\} \notag \\ &= {\rm Tr} \left[ Z^{\prime} \, M_\lambda \, Z \right] -% {\rm Tr} \left[ Z^{\prime} \, M_{\widehat \lambda} \, Z \right] + \omega_R \, \left[ \limfunc{rank}(M_\lambda) - \limfunc{rank}(M_{\widehat \lambda}) \right] \; , \end{align*} and because $\limfunc{rank}(M_{\widehat \lambda}) = N-R$ and $\limfunc{rank}% (M_\lambda) \leq N-R$ we have \begin{align*} {\rm Tr} \left[ Z^{\prime} \, M_{\widehat \lambda} \, Z \right] &\leq {\rm Tr} \left[ Z^{\prime} \, M_\lambda \, Z \right] \; . \end{align*} This shows $M_{\widehat \lambda}$ is the optimal choice in the minimization problem of $S_3(Z)$, i.e., the optimal $\lambda=\widehat \lambda$ is chosen such that the span of the $N$-dimensional vectors $\widehat \lambda_r$ ($r=1\ldots R$) equals to the span of the $R$ eigenvectors that correspond to the $R$ largest eigenvalues of $ZZ^{\prime}$. This shows $S_3(Z)=S_6(Z)$. Analogously one can show $S_2(Z)=S_5(Z)$. \item[(iv)] In the minimization problem in $S_4(Z)$ we can choose $\widetilde \lambda$ such that the span of the $N$-dimensional vectors $\widetilde \lambda_r$ ($r=1\ldots R_1$) is equal to the span of the $R_1$ eigenvectors that correspond to the $R_1$ largest eigenvalues of $ZZ^{\prime}$. In addition, we can choose $\widetilde f$ such that the span of the $T$-dimensional vectors $\widetilde f_r$ ($r=1\ldots R_2$) is equal to the span of the $R_2$ eigenvectors that correspond to the $(R_1+1)$-largest up to the $R$-largest eigenvalue of $Z^{\prime}Z$. With this choice of $\widetilde \lambda$ and $\widetilde f$ we actually project out all the $R$ largest eigenvalues of $Z'Z$ and $ZZ'$. This shows that $S_4(Z) \leq S_5(Z)$. (This result is actually best understood by using the singular value decomposition of $Z$.) We can write $M_{\widetilde \lambda} \, Z \, M_{\widetilde f}=Z-\widetilde Z$, where \begin{align*} \widetilde Z &= P_{\widetilde \lambda} \, Z \, M_{\widetilde f} + Z \, P_{\widetilde f} \; . \end{align*} Because ${\rm rank}(Z)\leq {\rm rank}(P_{\widetilde \lambda} \, Z \, M_{\widetilde f}) +{\rm rank}(Z \, P_{\widetilde f}) = R_1 + R_2 = R$, we can always write $\widetilde Z=\lambda f'$ for some appropriate $N\times R$ and $T\times R$ matrices $\lambda$ and $f$. This shows that \begin{align*} S_4(Z) &= \min_{\bar \lambda,\bar f} {\rm Tr}(M_{\widetilde \lambda} \, Z \, M_{\widetilde f} \, Z') \nonumber \\ &\geq \min_{\{\widetilde Z \;:\; {\rm rank}(\widetilde Z)\leq R\}} {\rm Tr}((Z-\widetilde Z)(Z-\widetilde Z)') \nonumber \\ &= \min_{f,\lambda} {\rm Tr}\left[ \left(Z-\lambda f'\right) \left(Z'-f \lambda'\right)\right] = S_1(Z) \; . \end{align*} Thus we have shown here $S_1(Z) \leq S_4(Z) \leq S_5(Z)$, and this holds with equality because $S_1(Z)=S_5(Z)$ was already shown above. \end{itemize} \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Supplement to the Consistency Proof (Appendix \ref{app:consistency})} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{lemma} \label{lemma:wv} Under assumptions \ref{ass:A1} and \ref{ass:A4} there exists a constant $B_0>0$ such that for the matrices $w$ and $v$ introduced in assumption \ref{ass:A4} we have \begin{align*} w' \, M_{\lambda^0} \, w \, - \, B_0 \, w' \, w &\geq 0 \; , \qquad \text{wpa1,} \nonumber \\ v' \, M_{f^0} \, v \, - \, B_0 \, v' \, v &\geq 0 \; , \qquad \text{wpa1.} \end{align*} \end{lemma} \begin{proof}[\bf Proof] We can decompose $w=\widetilde w \, \bar w$, where $\widetilde w$ is an $N \times {\rm rank}(w)$ matrix and $\bar w$ is a ${\rm rank}(w) \times K_1$ matrix. Note $\widetilde w$ has full rank, and $M_w=M_{\widetilde w}$. By assumption \ref{ass:A1}(i) we know $\lambda^{0\prime}\lambda^0/N$ has a probability limit, i.e., there exists some $B_1>0$ such that $\lambda^{0\prime}\lambda^0/N < B_1 \mathbb{I}_R$ wpa1. Using this and assumption \ref{ass:A4} we find for any $R\times 1$ vector $a\neq0$ we have \begin{align*} \frac{\|M_{v} \, \lambda^0 \, a\|^2} {\|\lambda^0 \, a\|^2} \, = \, \frac{a' \, \lambda^{0\prime} \, M_{v} \, \lambda^0 \, a } {a' \, \lambda^{0\prime} \, \lambda^0 \, a} &> \frac{B}{B_1} \;, \qquad \text{wpa1.} \end{align*} Applying Lemma~\ref{lemma:angle} we find \begin{align*} \min_{0\neq b \in \mathbb{R}^{{\rm rank}(w)}} \, \frac{b' \, \widetilde w' \, M_{\lambda^0} \, \widetilde w \, b } {b' \, \widetilde w' \, \widetilde w \, b} \, = \, \min_{0\neq a \in \mathbb{R}^R} \, \frac{a' \, \lambda^{0\prime} \, M_{w} \, \lambda^0 \, a } {a' \, \lambda^{0\prime} \, \lambda^0 \, a} &> \frac{B}{B_1} \; , \; \qquad \text{wpa1.} \end{align*} Therefore we find for every ${\rm rank}(w) \times 1$ vector $b$ that $b' \left( \widetilde w' \, M_{\lambda^0} \, \widetilde w - (B/B_1) \widetilde w' \widetilde w \,\right) b > 0$, wpa1. Thus $\widetilde w' \, M_{\lambda^0} \, \widetilde w - (B/B_1) \, \widetilde w' \, \widetilde w > 0$, wpa1. Multiplying from the left with $\bar w'$ and from the right with $\bar w$ we obtain $w' \, M_{\lambda^0} \, w - (B/B_1) \, w' \, w \geq 0$, wpa1. This is what we wanted to show. Analogously we can show the statement for $v$. \end{proof} As a consequence of the this lemma we obtain some properties of the low-rank regressors summarized in the following lemma. \begin{lemma} \label{lemma:lowrankprop} Let the assumptions \ref{ass:A1} and \ref{ass:A4} be satisfied and let $X_{{\rm low},\alpha}=\sum_{l=1}^{K_1} \alpha_l X_l$ be a linear combination of the low-rank regressors. Then there exists some constant $B>0$ such that \begin{align*} \min_{\{\alpha \in \mathbb{R}^{K_1}, \|\alpha\|=1\}} \frac{\left\|X_{{\rm low},\alpha} \, M_{f^0} \, X_{{\rm low},\alpha}'\right\|}{NT} &> B \; , \qquad \text{wpa1,} \nonumber \\ \min_{\{\alpha \in \mathbb{R}^{K_1}, \|\alpha\|=1\}} \frac{\left\|M_{\lambda^0} \, X_{{\rm low},\alpha} \, M_{f^0} \, X_{{\rm low},\alpha}' \, M_{\lambda^0} \right\|}{NT} &> B \; , \qquad \text{wpa1.} \end{align*} \end{lemma} \begin{proof}[\bf Proof] Note $\left\|M_{\lambda^0} \, X_{{\rm low},\alpha} \, M_{f^0} \, X_{{\rm low},\alpha}' \, M_{\lambda^0} \right\| \leq \left\|X_{{\rm low},\alpha} \, M_{f^0} \, X_{{\rm low},\alpha}'\right\|$, because $\|M_{\lambda^0}\|=1$, i.e., if we can show the second inequality of the lemma we have also shown the first inequality. We can write $X_{{\rm low},\alpha} = w \, {\rm diag}(\alpha') \, v'$. Using Lemma~\ref{lemma:wv} and part (v), (vi) and (ix) of Lemma~\ref{lemma:inequalities} we find \begin{align*} \left\|M_{\lambda^0} \, X_{{\rm low},\alpha} \, M_{f^0} \, X_{{\rm low},\alpha}' \, M_{\lambda^0} \right\| &= \left\|M_{\lambda^0} \, w \, {\rm diag}(\alpha') \, v' \, M_{f^0} \, v \, {\rm diag}(\alpha') \, w' M_{\lambda^0} \right\| \nonumber \\ & \geq B_0 \, \left\|M_{\lambda^0} \, w \, {\rm diag}(\alpha') \, v' \, \, v \, {\rm diag}(\alpha') \, w' M_{\lambda^0} \right\| \nonumber \\ & \geq \frac{B_0}{K_1} \, {\rm Tr} \left[ M_{\lambda^0} \, w \, {\rm diag}(\alpha') \, v' \, \, v \, {\rm diag}(\alpha') \, w' M_{\lambda^0} \right] \nonumber \\ & = \frac{B_0}{K_1} \, {\rm Tr} \left[ v \, {\rm diag}(\alpha') \, w' M_{\lambda^0} w \, {\rm diag}(\alpha') \, v' \right] \nonumber \\ & \geq \frac{B_0}{K_1} \, \left\| v \, {\rm diag}(\alpha') \, w' M_{\lambda^0} w \, {\rm diag}(\alpha') \, v' \right\| \nonumber \\ & \geq \frac{B_0^2}{K_1} \, \left\| v \, {\rm diag}(\alpha') \, w' w \, {\rm diag}(\alpha') \, v' \right\| \nonumber \\ & \geq \frac{B_0^2}{K_1^2} \, {\rm Tr}\left[ v \, {\rm diag}(\alpha') \, w' w \, {\rm diag}(\alpha') \, v' \right] \nonumber \\ & = \frac{B_0^2}{K_1^2} {\rm Tr}\left[ X_{{\rm low},\alpha} X'_{{\rm low},\alpha} \right] \; . \end{align*} Thus we have $ \left\|M_{\lambda^0} \, X_{{\rm low},\alpha} \, M_{f^0} \, X_{{\rm low},\alpha}' \, M_{\lambda^0} \right\| /(NT) \geq (B_0/K_1)^2 \, \alpha' \, W^{\rm low}_{NT} \, \alpha$ , where the $K_1 \times K_1$ matrix $W^{\rm low}_{NT}$ is defined by $W^{\rm low}_{NT,l_1 l_2} = (NT)^{-1} {\rm Tr}\left( X_{l_1} X'_{l_2} \right)$, i.e., it is a submatrix of $W_{NT}$. Because $W_{NT}$ and thus $W^{\rm low}_{NT}$ converges to a positive definite matrix the lemma is proven by the inequality above. \end{proof} Using the above lemmas we can now prove the lower bound on $\widetilde S^{(2)}_{NT}(\beta,f)$ that was used in the consistency proof. Remember \begin{align*} \widetilde S^{(2)}_{NT}(\beta,f) &= \frac{1}{NT} \; {\rm Tr}\left[ \left( \lambda^0 \, f^{0\prime} + \sum_{k=1}^{K} (\beta^0_k-\beta_k) X_{k} \right) \, M_f \, \left( \lambda^0 \, f^{0\prime} + \sum_{k=1}^{K} (\beta^0_k-\beta_k) X_{k} \right)^{\prime}\, P_{(\lambda^0,w)} \right] \; . \end{align*} We want to show under the assumptions of theorem \ref{th:consistency} there exist finite positive constants $a_0$, $a_1$, $a_2$, $a_3$ and $a_4$ such that \begin{align*} \widetilde S^{(2)}_{NT}(\beta,f) &\geq \, \frac{a_0 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|^2 } { \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|^2 + a_1 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\| + a_2 } \nonumber \\ & \qquad \qquad - a_3 \left\| \beta^{\rm high} - \beta^{0, {\rm high}} \right\| - a_4 \left\| \beta^{\rm high} - \beta^{0, {\rm high}} \right\| \, \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\| \; , \qquad \text{wpa1.} \end{align*} \begin{proof}[\bf Proof of the lower bound on $\widetilde S^{(2)}_{NT}(\beta,f)$.] Applying Lemma~\ref{lemma:Optimization} and part (xi) of Lemma~\ref{lemma:inequalities} we find \begin{align*} \widetilde S^{(2)}_{NT}(\beta,f) &\geq \frac{1}{NT} \; {\mu}_{R+1} \left[ \left( \lambda^0 \, f^{0\prime} + \sum_{k=1}^{K} (\beta^0_k-\beta_k) X_{k} \right)^{\prime} \, P_{(\lambda^0,w)} \, \left( \lambda^0 \, f^{0\prime} + \sum_{k=1}^{K} (\beta^0_k-\beta_k) X_{k} \right) \right] \nonumber \\ &= \frac{1}{NT} \; {\mu}_{R+1} \Bigg[ \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right)^{\prime} \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right) \nonumber \\ &\qquad\qquad\qquad\qquad+ \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right)^{\prime} P_{(\lambda^0,w)} \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m \nonumber \\ &\qquad\qquad\qquad\qquad+ \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m' P_{(\lambda^0,w)} \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right) \nonumber \\ &\qquad\qquad\qquad\qquad+ \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m' P_{(\lambda^0,w)} \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m \Bigg] \nonumber \\ &\geq \frac{1}{NT} \; {\mu}_{R+1} \Bigg[ \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right)^{\prime} \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right) \nonumber \\ &\qquad\qquad\qquad\qquad+ \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right)^{\prime} P_{(\lambda^0,w)} \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m \nonumber \\ &\qquad\qquad\qquad\qquad+ \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m' P_{(\lambda^0,w)} \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right) \Bigg] \nonumber \\ &\geq \frac{1}{NT} \; {\mu}_{R+1} \left[ \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right)^{\prime} \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right) \right] \nonumber \\ &\quad - a_3 \left\| \beta^{\rm high} - \beta^{0, {\rm high}} \right\| - a_4 \left\| \beta^{\rm high} - \beta^{0, {\rm high}} \right\| \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\| \; , \qquad \text{wpa1,} \end{align*} where $a_3>0$ and $a_4>0$ are appropriate constants. For the last step we used part (xii) of Lemma~\ref{lemma:inequalities} and the fact that \begin{align*} & \frac 1 {NT} \left\| \sum_{m=K_1}^{K} (\beta^0_m-\beta_m) X_m' P_{(\lambda^0,w)} \left( \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) w_l \, v_l' \right) \right\| \nonumber \\ & \qquad \leq K \, \left\| \beta^{\rm high} - \beta^{0, {\rm high}} \right\| \max_m \left\| \frac {X_m}{\sqrt{NT}} \right\| \left( \left\| \frac {\lambda^0 \, f^{0\prime}}{\sqrt{NT}} \right\| + K \, \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\| \, \max_l \left\| \frac {w_l v_l'}{\sqrt{NT}} \right\| \right) \; . \end{align*} Our assumptions guarantee the operator norms of $\lambda^0 \, f^{0\prime}/{\sqrt{NT}}$ and ${X_m}/{\sqrt{NT}}$ are bounded from above as $N,T \rightarrow \infty$, which results in finite constants $a_3$ and $a_4$. We write the above result as $\widetilde S^{(2)}_{NT}(\beta,f) \geq {\mu}_{R+1}(A'A)/(NT) + \text{terms containing $\beta^{\rm high}$}$, where we defined $A=\lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) \, w_l \, v_l'$. We also write $A = A_1 + A_2 + A_3$, with $A_1 = M_w \, A \, P_{f^0} = M_{w} \, \lambda^0 \, f^{0\prime}$, $A_2 = P_w \, A \, M_{f^0} = \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) \, w_l \, v_l' \, M_{f^0}$, $A_3 = P_w \, A \, P_{f^0} = P_w \, \lambda^0 \, f^{0\prime} + \sum_{l=1}^{K_1} (\beta^0_l-\beta_l) \, w_l \, v_l' \, P_f$. We then find $A'A=A_1' A_1 + (A'_2 + A_3') (A_2 + A_3)$ and \begin{align*} A' A \, &\geq \, A' A - (a^{1/2} A'_3 + a^{-1/2} A_2') (a^{1/2} A_3 + a^{-1/2} A_2) \nonumber \\ &= \left[ A'_1 A_1 - (a-1) \, A'_3 A_3 \right] \, + \, (1-a^{-1}) A'_2 A_2 \; , \end{align*} where $\geq$ for matrices refers to the difference being positive definite, and $a$ is a positive number. We choose $a = 1 + {\mu}_{R}(A'_1 A_1) / (2 \, \|A_3\|^2)$. The reason for this choice becomes clear below. Note $\left[A'_1 A_1 - (a-1) \, A'_3 A_3 \right]$ has at most rank $R$ (asymptotically it has exactly rank $R$). The non-zero eigenvalues of $A'A$ are therefore given by the (at most) $R$ non-zero eigenvalues of $\left[ A'_1 A_1 - (a-1) \, A'_3 A_3 \right]$ and the non-zero eigenvalues of $(1-a^{-1}) A'_2 A_2$, the largest one of the latter being given given by the operator norm $(1-a^{-1}) \|A_2 \|^2$. We therefore find \begin{align*} \frac{1}{NT} \; {\mu}_{R+1} \left( A'A \right) &\geq \frac{1}{NT} \; {\mu}_{R+1} \left[ \left( A'_1 A_1 - (a-1) \, A'_3 A_3 \right) \, + \, (1-a^{-1}) A'_2 A_2 \right] \nonumber \\ &\geq \frac{1}{NT} \, \min\left\{ (1-a^{-1}) \|A_2\|^2 \; , \; \; {\mu}_{R}\left[A'_1 A_1 - (a-1) \, A'_3 A_3 \right] \right\} \; . \end{align*} Using Lemma~\ref{lemma:inequalities}(xii) and our particular choice of $a$ we find \begin{align*} {\mu}_{R} \, \left[ A'_1 A_1 - (a-1) \, A'_3 A_3 \right] &\geq \, {\mu}_{R}(A'_1 A_1) - \left\| (a-1) A'_3 A_3 \right\| \nonumber \\ &= \, \frac{1}{2} \, {\mu}_{R}(A'_1 A_1) \; . \end{align*} Therefore \begin{align*} \frac 1 {NT} \, {\mu}_{R+1}(A'A) &\geq \frac{1}{2 \,NT} \, {\mu}_{R}(A'_1 A_1) \, \min\left\{ 1 \; , \; \; \frac{2 \, \|A_2\|^2} {2 \, \|A_3\|^2 + {\mu}_{R}(A'_1 A_1)} \right\} \nonumber \\ &\geq \frac{1}{NT} \, \frac{\|A_2\|^2 \, {\mu}_{R}(A'_1 A_1)} {2 \, \|A\|^2 + {\mu}_{R}(A'_1 A_1)} \; , \end{align*} where we used $\|A\|\geq\|A_3\|$ and $\|A\|\geq\|A_2\|$. Our assumptions guarantee there exist positive constants $c_0$, $c_1$, $c_2$, and $c_3$ such that \begin{align*} \frac {\|A\|} {\sqrt{NT}} &\leq \frac {\|\lambda^0 \, f^{0\prime}\|} {\sqrt{NT}} + \sum_{l=1}^{K_1} |\beta^0_l-\beta_l| \frac {\| w_l \, v_l' \|} {\sqrt{NT}} \leq c_0 + c_1 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\| \, , \quad \text{wpa1} \; , \nonumber \\ \frac {{\mu}_{R}(A'_1 A_1)} {NT} &= \frac{{\mu}_{R}\left( f^0 \, \lambda^{0\prime} \, M_{w} \, \lambda^0 \, f^{0\prime} \right)} {NT} \geq c_2 \, , \quad \text{wpa1} \; , \nonumber \\ \frac{\|A_2\|^2}{NT} &= {\mu}_{1} \left[ \sum_{l_1=1}^{K_1} (\beta^0_{l_1}-\beta_{l_1}) \, w_{l_1} \, v_{l_1}' \, M_{f^0} \, \sum_{l_2=1}^{K_1} (\beta^0_{l_2}-\beta_{l_2}) \, v_{l_2} \, w_{l_2}' \right] \nonumber \\ &\geq c_3 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|^2 \, , \quad \text{wpa1} \; , \end{align*} were for the last inequality we used Lemma~\ref{lemma:lowrankprop}. We thus have \begin{align*} \frac{1}{NT} \; {\mu}_{R+1} \left( A'A \right) &\geq \frac{c_3 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|^2} {1 + \frac{2} {c_2} \left(c_0 + c_1 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|\right)^2 } \, , \quad \text{wpa1} \; . \end{align*} Defining $a_0=\frac{c_2 c_3}{2 c_1^2}$, $a_1=\frac{2 c_0}{c_1}$ and $a_2=\frac{c_2}{2 c_1^2}$ we thus obtain \begin{align*} \frac{1}{NT} \; {\mu}_{R+1} \left( A'A \right) &\geq \frac{a_0 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|^2 } { \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\|^2 + a_1 \left\| \beta^{\rm low} - \beta^{0,{\rm low}} \right\| + a_2 } \, , \quad \text{wpa1} \; , \end{align*} i.e., we have shown the desired bound on $\widetilde S^{(2)}_{NT}(\beta,f)$. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Regarding the Proof of Corollary \ref{cor:limit}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% As discussed in the main text, the proof of Corollary \ref{cor:limit} is provided in Moon and Weidner~\cite*{MoonWeidner2015}. All that is left to show here is the matrix $W_{NT}=W_{NT}(\lambda^0,\, f^0,\, X_{k})$ does not become singular as $N,T \rightarrow \infty$ under our assumptions. \begin{proof}[\bf Proof] Remember \begin{align*} % \tag{\ref{DefW}} W_{NT} &= \frac 1 {NT} {\rm Tr}(M_{f^0} \, X^{\prime}_{k_1} \, M_{\lambda^0} \, X_{k_2}) \; . \end{align*} The smallest eigenvalue of the symmetric matrix $W(\lambda^0,\, f^0,\, X_{k})$ is given by \begin{align*} {\mu}_K \left( W_{NT} \right) &= \min_{\{a \in \mathbb{R}^K, \; a \neq 0\}} \frac{a' \, W_{NT} \, a} {\|a\|^2} \nonumber \\ &= \min_{\{a \in \mathbb{R}^K, \; a \neq 0\}} \frac 1 {NT \, \|a\|^2} {\rm Tr}\left[ M_{f^0} \, \left( \sum_{k_1=1}^K \, a_{k_1} \, X^{\prime}_{k_1} \right) \, M_{\lambda^0} \, \left(\sum_{k_2=1}^K \, a_{k_2} \, X_{k_2} \right) \right] \nonumber \\ &= \min_{\begin{minipage}{2.8cm}\begin{center}\scriptsize $\{\alpha \in \mathbb{R}^{K_1}, \; \varphi \in \mathbb{R}^{K_2}$\\ $\alpha \neq 0, \; \varphi\neq 0\}$\end{center}\end{minipage}} \frac { {\rm Tr}\left[ M_{f^0} \, \left( X'_{{\rm low},\varphi} + X'_{{\rm high},\alpha} \right) \, M_{\lambda^0} \, \left( X_{{\rm low},\varphi} + X_{{\rm high},\alpha} \right) \right] } {NT \, \left( \|\alpha\|^2 + \|\varphi\|^2 \right)} \; , \end{align*} where we decomposed $a=(\varphi',\alpha')'$, with $\varphi$ and $\alpha$ being vectors of length $K_1$ and $K_2$, respectively, and we defined linear combinations of high- and low-rank regressors: \begin{align*} X_{{\rm low},\varphi} &= \sum_{l=1}^{K_1} \, \varphi_{l} \, X_{l} \; , & X_{{\rm high},\alpha} &= \sum_{m=K_1+1}^{K} \, \alpha_{m} \, X_{m} \; . \end{align*} Here, as in assumption \ref{ass:A4} the components of $\alpha$ are denoted $\alpha_{K_1+1},\ldots,\alpha_{K}$ to simplify notation. We have $M_{\lambda^0} = M_{(\lambda^0,w)} + P_{(M_{\lambda^0} w)}$, where $w$ is the $N \times K_1$ matrix defined in assumption \ref{ass:A4}, i.e., $(\lambda^0,w)$ is an $N \times (R+K_1)$ matrix, whereas $M_{\lambda^0} w$ is also an $N \times K_1$ matrix. Using this we obtain \begin{align} & {\mu}_K \left( W_{NT} \right) \nonumber \\ & \quad = \min_{\begin{minipage}{2.8cm}\begin{center}\scriptsize $\{\varphi \in \mathbb{R}^{K_1}, \; \alpha \in \mathbb{R}^{K_2}$\\ $\varphi \neq 0, \; \alpha\neq 0\}$\end{center}\end{minipage}} \frac 1 {NT \, \left( \|\varphi\|^2 + \|\alpha\|^2 \right)} \bigg\{ {\rm Tr}\left[ M_{f^0} \, \left( X'_{{\rm low},\varphi} + X'_{{\rm high},\alpha} \right) \, M_{(\lambda^0,w)} \, \left( X_{{\rm low},\varphi} + X_{{\rm high},\alpha} \right) \right] \nonumber \\ & \qquad \qquad \qquad \qquad \qquad \qquad \qquad\qquad + {\rm Tr}\left[ M_{f^0} \, \left( X'_{{\rm low},\varphi} + X'_{{\rm high},\alpha} \right) \, P_{(M_{\lambda^0} w)} \, \left( X_{{\rm low},\varphi} + X_{{\rm high},\alpha} \right) \right] \bigg\} \nonumber \\ & \quad = \min_{\begin{minipage}{2.8cm}\begin{center}\scriptsize $\{\varphi \in \mathbb{R}^{K_1}, \; \alpha \in \mathbb{R}^{K_2}$\\ $\varphi \neq 0, \; \alpha\neq 0\}$\end{center}\end{minipage}} \frac 1 {NT \, \left( \|\varphi\|^2 + \|\alpha\|^2 \right)} \bigg\{ {\rm Tr}\left[ M_{f^0} \, X'_{{\rm high},\alpha} \, M_{(\lambda^0,w)} \, X_{{\rm high},\alpha} \right] \nonumber \\ & \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad + {\rm Tr}\left[ M_{f^0} \, \left( X'_{{\rm low},\varphi} + X'_{{\rm high},\alpha} \right) \, P_{(M_{\lambda^0} w)} \, \left( X_{{\rm low},\varphi} + X_{{\rm high},\alpha} \right) \right] \bigg\} \, . \label{eq:boundEK1} \end{align} We note there exists finite positive constants $c_1$, $c_2$, and $c_3$ such that \begin{align} \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm high},\alpha} \, M_{(\lambda^0,w)} \, X_{{\rm high},\alpha} \right] &\geq \, c_1 \| \alpha \|^2 \; , \quad \text{wpa1,} \nonumber \\ \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, \left( X'_{{\rm low},\varphi} + X'_{{\rm high},\alpha} \right) \, P_{(M_{\lambda^0} w)} \, \left( X_{{\rm low},\varphi} + X_{{\rm high},\alpha} \right) \right] &\geq 0 \; , \nonumber \\ \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm low},\varphi} \, P_{(M_{\lambda^0} w)} \, X_{{\rm low},\varphi} \right] &\geq \, c_2 \, \| \varphi \|^2 \; , \quad \text{wpa1,} \nonumber \\ \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm low},\varphi} \, P_{(M_{\lambda^0} w)} \, X_{{\rm high},\alpha} \right] &\geq - \frac {c_3} 2 \, \| \varphi \| \| \alpha \| \; , \quad \text{wpa1,} \nonumber \\ \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm high},\alpha} \, P_{(M_{\lambda^0} w)} \, X_{{\rm high},\alpha} \right] &\geq 0 \; , \label{inequ_highlow} \end{align} and we want to justify these inequalities now. The second and the last equation in \eqref{inequ_highlow} are true because, e.g., ${\rm Tr}\left[ M_{f^0} \, X'_{{\rm high},\alpha} \, P_{(M_{\lambda^0} w)} \, X_{{\rm high},\alpha} \right] ={\rm Tr}\left[ M_{f^0} \, X'_{{\rm high},\alpha} \, P_{(M_{\lambda^0} w)} \, X_{{\rm high},\alpha} \, M_{f^0} \right]$, and the trace of a symmetric positive semi-definite matrix is non-negative. The first inequality in \eqref{inequ_highlow} is true because ${\rm rank}(f^0)+{\rm rank}(\lambda^0,w)=2R+K_1$ and using Lemma~\ref{lemma:Optimization} and assumption \ref{ass:A4} we have \begin{align*} \frac 1 {NT\|\alpha\|^2} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm high},\alpha} \, M_{(\lambda^0,w)} \, X_{{\rm high},\alpha} \right] \geq \frac 1 {NT\|\alpha\|^2} {\mu}_{2R+K_1+1}\left[ X_{{\rm high},\alpha} \, X'_{{\rm high},\alpha} \right] &> b \; , \quad \text{wpa1}, \end{align*} i.e., we can set $c_1=b$. The third inequality in \eqref{inequ_highlow} is true because according Lemma~\ref{lemma:inequalities}(v) we have \begin{align*} \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm low},\varphi} \, P_{(M_{\lambda^0} w)} \, X_{{\rm high},\alpha} \right] &\geq - \, \frac {K_1} {NT} \, \left\| X_{{\rm low},\varphi} \right\| \left\| X_{{\rm high},\alpha} \right\| \nonumber \\ &\geq - \, \frac {K_1} {NT} \, \left\| X_{{\rm low},\varphi} \right\|_F \left\| X_{{\rm high},\alpha} \right\|_F \nonumber \\ &\geq - \, K_1 \, K_1 \, K_2 \, \|\varphi\| \, \|\alpha\| \, \, \max_{k_1=1\ldots K_1} \left\| \frac{X_{k_1}} {\sqrt{NT}} \right\|_F \, \max_{k_2=K_1+1\ldots K} \left\| \frac{X_{k_2}} {\sqrt{NT}} \right\|_F \nonumber \\ &\geq - \frac {c_3} 2 \, \|\varphi\| \, \|\alpha\| \; , \end{align*} where we used that assumption \ref{ass:A4} implies $\left\| X_{k} / \sqrt{NT} \right\|_F < C$ holds wpa1 for some constant $C$ as, and we set $c_3 = K_1 \, K_1 \, K_2 \, C^2$. Finally, we have to argue that the third inequality in \eqref{inequ_highlow} holds. Note $X'_{{\rm low},\varphi} \, P_{(M_{\lambda^0} w)} \, X_{{\rm low},\varphi} = X'_{{\rm low},\varphi} \, M_{\lambda^0} \, X_{{\rm low},\varphi}$, i.e., we need to show \begin{align*} \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm low},\varphi} \, M_{\lambda^0} \, X_{{\rm low},\varphi} \right] &\geq \, c_2 \, \| \varphi \|^2 \; . \end{align*} Using part (vi) of Lemma~\ref{lemma:inequalities} we find \begin{align*} \frac 1 {NT} {\rm Tr}\left[ M_{f^0} \, X'_{{\rm low},\varphi} \, M_{\lambda^0} \, X_{{\rm low},\varphi} \right] &= \frac 1 {NT} {\rm Tr}\left[ M_{\lambda^0} \, X_{{\rm low},\varphi} \, M_{f^0} \, X'_{{\rm low},\varphi} \, M_{\lambda^0} \right] \nonumber \\ &\geq \frac 1 {NT} \left\| M_{\lambda^0} \, X_{{\rm low},\varphi} \, M_{f^0} \, X'_{{\rm low},\varphi} \, M_{\lambda^0} \right\| \; , \end{align*} and according to Lemma~\ref{lemma:lowrankprop} this expression is bounded by some positive constant times $\| \varphi \|^2$ (in the lemma we have $\| \varphi \|=1$, but all expressions are homogeneous in $\|\varphi\|$). Using the inequalities \eqref{inequ_highlow} in equation \eqref{eq:boundEK1} we obtain \begin{align*} {\mu}_K \left( W_{NT} \right) &\geq \min_{\begin{minipage}{2.8cm}\begin{center}\scriptsize $\{\varphi \in \mathbb{R}^{K_1}, \; \alpha \in \mathbb{R}^{K_2}$\\ $\varphi \neq 0, \; \alpha\neq 0\}$\end{center}\end{minipage}} \frac 1 {\|\varphi\|^2 + \|\alpha\|^2} \left\{ c_1 \| \alpha \|^2 + \max\left[ 0, \, c_2 \| \varphi \|^2 - c_3 \|\varphi\| \|\alpha\| \right] \right\} \nonumber \\ & \geq \min\left( \frac {c_2} 2 , \, \frac{c_1 c_2^2} {c_2^2+c_3^2} \right) \, , \quad \text{wpa1}. \end{align*} Thus, the smallest eigenvalue of $W_{NT}$ is bounded from below by a positive constant as $N,T \rightarrow \infty$, i.e., $W_{NT}$ is non-degenerate and invertible. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Proof of Examples for Assumption~\ref{ass:A5}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{proof}[\bf Proof of Example 1.] We want to show the conditions of Assumption~\ref{ass:A5} are satisfied. Conditions (i)-(iii) are satisfied by the assumptions of the example. For condition (iv), notice ${\rm Cov} \left( X_{it}, X_{is} | \mathcal{C} \right) = \mathbb{E} \left( U_{it} U_{is} \right)$. Because $|\beta^0| < 1$ and $\sup_{it} \mathbb{E}(e_{it}^2) < \infty$, it follows \begin{eqnarray*} \frac{1}{NT} \sum_{i=1}^{N} \sum_{t,s=1}^T \left| {\rm Cov} \left( X_{it}, X_{is} | \mathcal{C} \right) \right| &=& \frac{1}{NT} \sum_{i=1}^{N} \sum_{t,s=1}^T \left| \mathbb{E} \left( U_{it} U_{is} \right) \right| \\ &=& \frac{1}{NT} \sum_{i=1}^{N} \sum_{t,s=1}^T \sum_{p,q = 0}^{\infty} \left| (\beta^0)^{p+q} \mathbb{E} \left( e_{it-p} e_{is-q} \right) \right| < \infty . \end{eqnarray*} For condition (v), notice by the independence between the sigma field $% \mathcal{C}$ and the error terms $\left\{ e_{it}\right\} $ that we have for some finite constant $M,$ \begin{eqnarray*} && \frac{1}{NT^{2}} \sum_{i=1}^{N} \sum_{t,s,u,v=1}^{T} \left\vert {\rm Cov}\left( e_{it}\widetilde{X}_{is},e_{iu}\widetilde{X}_{iv}|\mathcal{C}\right) \right\vert \\ &=& \frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\left\vert {\rm Cov}\left( e_{it}U_{is},e_{iu}U_{iv}\right) \right\vert \\ &=& \frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\sum_{p,q=0}^{\infty }\left\vert \left( \beta ^{0}\right) ^{p+q}\mathbb{E}\left( e_{it}e_{is-p}e_{iu}e_{iv-q}\right) -\left( \beta ^{0}\right) ^{p}\mathbb{E}% \left( e_{it}e_{is-p}\right) \left( \beta ^{0}\right) ^{q}\mathbb{E}\left( e_{iu}e_{iv-q}\right) \right\vert \\ &\leq & \frac{M}{T^{2}}\sum_{t,s,u,v=1}^{T}\sum_{p,q=0}^{\infty }\left\vert \beta ^{0}\right\vert ^{p+q}\left[ \mathbb{I}\left\{ t=u\right\} \mathbb{I}% \left\{ s-p=v-q\right\} +\mathbb{I}\left\{ t=v-q\right\} \mathbb{I}\left\{ s-p=u\right\} \right] \\ &=& \frac{M}{T^{2}}\sum_{t,u,s,v=1}^{T}\sum_{k=-\infty }^{s}\sum_{l=-\infty }^{v}\left\vert \beta ^{0}\right\vert ^{s-k+v-l}\mathbb{I}\left\{ t=u\right\} \mathbb{I}\left\{ k=l\right\} +M\left( \frac{1}{T}\sum _{\substack{ s,u=1 \\ s-u\geq 0}}^{T}\left\vert \beta ^{0}\right\vert ^{s-u}\right) \left( \frac{1}{T}\sum_{\substack{ v,t=1 \\ v-t\geq 0}}% ^{T}\left\vert \beta ^{0}\right\vert ^{v-t}\right) \\ &=& \frac{M}{T}\sum_{s,v=1}^{T}\sum_{k=-\infty }^{\min \left\{ s,v\right\} }\left\vert \beta ^{0}\right\vert ^{s+v-2k}+M\left( \frac{1}{T}\sum _{\substack{ s,u=1 \\ s-u\geq 0}}^{T}\left\vert \beta ^{0}\right\vert ^{s-u}\right) \left( \frac{1}{T}\sum_{\substack{ v,t=1 \\ v-t\geq 0}}% ^{T}\left\vert \beta ^{0}\right\vert ^{v-t}\right) . \end{eqnarray*}% Notice \begin{eqnarray*} &&\frac{1}{T}\sum_{s,v=1}^{T}\sum_{k=-\infty }^{\min \left\{ s,v\right\} }\left\vert \beta ^{0}\right\vert ^{s+v-2k} \\ &=&\frac{2}{T}\sum_{s=2}^{T}\sum_{v=1}^{s}\sum_{k=-\infty }^{v}\left\vert \beta ^{0}\right\vert ^{s-v+2(v-k)}+\frac{2}{T}\sum_{s=1}^{T}\sum_{k=-\infty }^{s}\left\vert \beta ^{0}\right\vert ^{2(s-k)} \\ &=&\frac{2}{T}\sum_{s=2}^{T}\sum_{v=1}^{s}\left\vert \beta ^{0}\right\vert ^{s-v}\sum_{l=0}^{\infty }\left\vert \beta ^{0}\right\vert ^{2l}+\frac{2}{T}% \sum_{s=1}^{T}\sum_{l=0}^{\infty }\left\vert \beta ^{0}\right\vert ^{2l} \\ &=&\frac{2}{1-\left\vert \beta ^{0}\right\vert ^{2}}\frac{1}{T}% \sum_{s=2}^{T}\sum_{v=1}^{s}\left\vert \beta ^{0}\right\vert ^{s-v}+\frac{2}{% 1-\left\vert \beta ^{0}\right\vert ^{2}} \\ &=&\left( \frac{2}{1-\left\vert \beta ^{0}\right\vert ^{2}}\right) \sum_{l=1}^{T-1}\left\vert \beta ^{0}\right\vert ^{l}\left( 1-\frac{l}{T}% \right) +\frac{2}{1-\left\vert \beta ^{0}\right\vert ^{2}} \\ &=&O\left( 1\right) , \end{eqnarray*}% and \begin{equation*} \frac{1}{T}\sum_{\substack{ s,u=1 \\ s-u\geq 0}}^{T}\left\vert \beta ^{0}\right\vert ^{s-u}=\frac{1}{T}\sum_{s=1}^{T}\sum_{u=1}^{s}\left\vert \beta ^{0}\right\vert ^{s-u}=\sum_{l=0}^{T-1}\left\vert \beta ^{0}\right\vert ^{l}\left( 1-\frac{l}{T}\right) =O\left( 1\right) . \end{equation*}% Therefore, we have the desired result \begin{equation*} \frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\left\vert {\rm Cov}\left( e_{it}% \widetilde{X}_{is},e_{iu}\widetilde{X}_{iv}|\mathcal{C}\right) \right\vert ={\cal O}_{p}\left( 1\right) . \end{equation*} \end{proof} \textsc{Preliminaries for Proof of Example 2} \begin{itemize} \item Although we observe $X_{it}$ for $1\leq t\leq T,$ here we treat $% Z_{it}=\left( e_{it},X_{it}\right) $ as having an infinite past and future. Define \begin{equation*} \mathcal{G}_{\tau }^{t}\left( i\right) = {\cal C} \vee \sigma \left( \left\{ X_{is}:\tau \leq s\leq t\right\} \right) \text{ and }\mathcal{H}_{\tau }^{t}\left( i\right) = {\cal C} \vee \sigma \left( \left\{ Z_{it}:\tau \leq s\leq t\right\}\right) . \end{equation*}% Then, by definition, we have $\mathcal{G}_{\tau }^{t}\left( i\right) ,% \mathcal{H}_{\tau }^{t}\left( i\right) \subset \mathcal{F}_{\tau }^{t}\left( i\right) $ for all $\tau ,t,i.$ By Assumption (iv) of Example 2, the time series of $\left\{ X_{it}:-\infty 1$ with $1/p+1/q<1.$ Denote $\left\Vert X_{it}\right\Vert _{\mathcal{C},p}=\left( \mathbb{E}% \left( \left\vert X_{it}\right\vert ^{p}|\mathcal{C}\right) \right) ^{1/p}. $ Then, for each $i,$ we have \begin{equation} \left\vert {\rm Cov}\left( X_{it},X_{it+m}|\mathcal{C}\right) \right\vert \leq 8\left\Vert X_{it}\right\Vert _{\mathcal{C},p}\left\Vert X_{it+m}\right\Vert _{\mathcal{C},q}\alpha _{m}^{1-\frac{1}{p}-\frac{1}{q}% }\left( i\right) . \label{eq:mixing inequality} \end{equation} \end{itemize} \begin{proof}[\bf Proof of Example 2.] Again, we want to show the conditions of Assumption~\ref{ass:A5} are satisfied. Conditions (i)-(iii) are satisfied by the assumptions of the example. For condition (iv), we apply the mixing inequality $\left( \ref{eq:mixing inequality}\right) $ with $p=q>4$. Then, we have% \begin{eqnarray*} &&\frac{1}{NT}\sum_{i=1}^{N}\sum_{t,s=1}^{T}\left\vert {\rm Cov}\left( X_{it},X_{is}|\mathcal{C}\right) \right\vert \\ &\leq &\frac{2}{NT}\sum_{i=1}^{N}\sum_{t=1}^{T}\sum_{m=0}^{T-t}\left\vert {\rm Cov}\left( X_{it},X_{it+m}|\mathcal{C}\right) \right\vert =\frac{2}{NT}% \sum_{i=1}^{N}\sum_{m=0}^{T-1}\sum_{t=1}^{T-m}\left\vert {\rm Cov}\left( X_{it},X_{it+m}|\mathcal{C}\right) \right\vert \\ &=&\frac{16}{NT}\sum_{i=1}^{N}\sum_{m=0}^{T-1}\sum_{t=1}^{T-m}\left\Vert X_{it}\right\Vert _{\mathcal{C},p}\left\Vert X_{it+m}\right\Vert _{\mathcal{C% },p}\alpha _{m}\left( i\right) ^{\frac{p-2}{P}} \\ &\leq &16\left( \sup_{i,t}\left\Vert X_{it}\right\Vert _{\mathcal{C}% ,p}^{2}\right) \sum_{m=0}^{\infty }\alpha _{m}^{\frac{p-2}{P}} \\ &\leq &{\cal O}_{p}\left( 1\right), \end{eqnarray*}% where the last line holds because $\sup_{i,t}\left\Vert X_{it}\right\Vert _{\mathcal{C}% ,p}^{2}={\cal O}_{p}\left( 1\right) $ for some $p>4$ as assumed in the example (2), and $\sum_{m=0}^{\infty }\alpha _{m}^{\frac{p-2}{P}}= \sum_{m=0}^{\infty} m^{-\zeta\frac{p-2}{P}} = {\cal O}\left( 1\right)$ because of $\zeta > 3\frac{4p}{4p-1}$ and $p>4$. For condition (v), we need to show \begin{equation*} \frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\left\vert {\rm Cov}\left( e_{it}% \widetilde{X}_{is},e_{iu}\widetilde{X}_{iv}|\mathcal{C}\right) \right\vert ={\cal O}_{p}\left( 1\right) . \end{equation*}% Notice \begin{eqnarray*} &&\frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\left\vert {\rm Cov}\left( e_{it}\widetilde{X}_{is},e_{iu}\widetilde{X}_{iv}|\mathcal{C}\right) \right\vert \\ &=&\frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\left\vert \mathbb{E}% \left( e_{it}\widetilde{X}_{is}e_{iu}\widetilde{X}_{iv}|\mathcal{C}\right) -\mathbb{E% }\left( e_{it}\widetilde{X}_{is}|\mathcal{C}\right) \mathbb{E}\left( e_{iu}% \widetilde{X}_{iv}|\mathcal{C}\right) \right\vert \\ &\leq &\frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t,s,u,v=1}^{T}\left\vert \mathbb{E% }\left( e_{it}\widetilde{X}_{is}e_{iu}\widetilde{X}_{iv}|\mathcal{C}\right) \right\vert +\frac{1}{N}\sum_{i=1}^{N}\left( \frac{1}{T}\sum_{t,s=1}^{T}% \mathbb{E}\left( e_{it}\widetilde{X}_{is}|\mathcal{C}\right) \right) ^{2} \\ &=&I+II,\text{ say.} \end{eqnarray*}% First, for term $I,$ there are a finite number of different orderings among the indices $t,s,u,v.$ We consider the case $t\leq s\leq u\leq v$ and establish the desired result. The other cases can be shown analogously. Note \begin{eqnarray*} &&\frac{1}{NT^{2}}\sum_{i=1}^{N}\sum_{t=1}^{T}\sum_{k=0}^{T-t}% \sum_{l=0}^{T-k}\sum_{m=0}^{T-l}\left\vert \mathbb{E}\left( e_{it}\widetilde{X}% _{it+k}e_{it+k+l}\widetilde{X}_{it+k+l+m}|\mathcal{C}\right) \right\vert \\ &\leq &\frac{1}{N}\sum_{i=1}^{N}\frac{1}{T^{2}}\sum_{t=1}^{T}\sum_{\substack{ 0\leq l,m\leq k \\ 0\leq k+l+m\leq T-t}}\left\vert \mathbb{E}\left( e_{it}\left( \widetilde{X}_{it+k}e_{it+k+l}\widetilde{X}_{it+k+l+m}\right) |\mathcal{% C}\right) \right\vert \\ &&+\frac{1}{N}\sum_{i=1}^{N}\frac{1}{T^{2}}\sum_{t=1}^{T}\sum_{\substack{ % 0\leq k,m\leq l \\ 0\leq k+l+m\leq T-t}}\bigg\vert \mathbb{E}\left[ \left( e_{it}\widetilde{X}_{it+k}\right) \left( e_{it+k+l}\widetilde{X}_{it+k+l+m}\right) |% \mathcal{C}\right] \\ && \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad -\mathbb{E}\left( e_{it}\widetilde{X}_{it+k}|\mathcal{C}% \right) \mathbb{E}\left( e_{it+k+l}\widetilde{X}_{it+k+l+m}|\mathcal{C}\right) \bigg\vert \\ &&+\frac{1}{N}\sum_{i=1}^{N}\frac{1}{T^{2}}\sum_{t=1}^{T}\sum_{\substack{ % 0\leq k,m\leq l \\ 0\leq k+l+m\leq T-t}}\mathbb{E}\left( e_{it}\widetilde{X}% _{it+k}|\mathcal{C}\right) \mathbb{E}\left( e_{it+k+l}\widetilde{X}_{it+k+l+m}|% \mathcal{C}\right) \\ &&+\frac{1}{N}\sum_{i=1}^{N}\frac{1}{T^{2}}\sum_{t=1}^{T}\sum_{\substack{ % 0\leq p,l\leq m \\ 0\leq k+l+m\leq T-t}}\left\vert \mathbb{E}\left[ \left( e_{it}\widetilde{X}_{it+k}e_{it+k+l}\right) \widetilde{X}_{it+k+l+m}|\mathcal{C}% \right] \right\vert \\ &=&I_{1}+I_{2}+I_{3}+I_{4},\text{ say.} \end{eqnarray*}% By applying the mixing inequality $\left( \ref{eq:mixing inequality}\right) $ to $\left\vert \mathbb{E}\left( e_{it}\left( \widetilde{X}_{it+k}e_{it+k+l}% \widetilde{X}_{it+k+l+m}\right) |\mathcal{C}\right) \right\vert $ with $e_{it}$ and $\widetilde{X}_{it+k}e_{it+k+l}\widetilde{X}_{it+k+l+m},$ we have% \begin{eqnarray*} &&\left\vert \mathbb{E}\left( e_{it}\left( \widetilde{X}_{it+k}e_{it+k+l}\widetilde{X% }_{it+k+l+m}\right) |\mathcal{C}\right) \right\vert \\ &\leq &8\left\Vert e_{it}\right\Vert _{\mathcal{C},p}\left\Vert \widetilde{X}% _{it+k}e_{it+k+l}\widetilde{X}_{it+k+l+m}\right\Vert _{\mathcal{C},q}\alpha _{k}^{1-\frac{1}{p}-\frac{1}{q}}\left( i\right) \\ &\leq &8\left\Vert e_{it}\right\Vert _{\mathcal{C},p}\left\Vert \widetilde{X}% _{it+k}\right\Vert _{\mathcal{C},3q}\left\Vert e_{it+k+l}\right\Vert _{% \mathcal{C},3q}\left\Vert \widetilde{X}_{it+k+l+m}\right\Vert _{\mathcal{C}% ,3q}\alpha _{k}^{1-\frac{1}{p}-\frac{1}{q}}\left( i\right) , \end{eqnarray*}% where the last inequality follows by the generalized Holder's inequality. Choose $p=3q>4.$ Then, \begin{eqnarray*} I_{1} &\leq &\frac{8}{N}\sum_{i=1}^{N}\frac{1}{T^{2}}\sum_{t=1}^{T}\sum _{\substack{ 0\leq l,m\leq k \\ 0\leq k+l+m\leq T-t}}\left\Vert e_{it}\right\Vert _{\mathcal{C},p}\left\Vert \widetilde{X}_{it+k}\right\Vert _{% \mathcal{C},p}\left\Vert e_{it+k+l}\right\Vert _{\mathcal{C},p}\left\Vert \widetilde{X}_{it+k+l+m}\right\Vert _{\mathcal{C},p}\alpha _{k}^{1-\frac{1}{4p}% }\left( i\right) \\ &\leq &8\left( \sup_{i,t}\left\Vert e_{it}\right\Vert _{\mathcal{C}% ,p}^{2}\right) \left( \sup_{i,t}\left\Vert \widetilde{X}_{it+k}\right\Vert _{% \mathcal{C},p}^{2}\right) \frac{1}{T^{2}}\sum_{t=1}^{T}\sum_{\substack{ % 0\leq l,m\leq k \\ 0\leq k+l+m\leq T-t}}\alpha _{k}^{1-\frac{1}{4p}} \\ &\leq &8\left( \sup_{i,t}\left\Vert e_{it}\right\Vert _{\mathcal{C}% ,p}^{2}\right) \left( \sup_{i,t}\left\Vert \widetilde{X}_{it+k}\right\Vert _{% \mathcal{C},p}^{2}\right) \sum_{k=0}^{\infty }k^{2}\alpha _{k}^{1-\frac{1}{4p% }} \\ &\leq &{\cal O}_{p}\left( 1\right), \end{eqnarray*}% where the last line holds because we assume in example (2) that $\left(\sup_{i,t}\left\Vert e_{it}\right\Vert _{\mathcal{C},p}^{2}\right) \left( \sup_{i,t}\left\Vert \widetilde{X}_{it+k}\right\Vert _{\mathcal{C},p}^{2}\right) ={\cal O}_{p}\left( 1\right) $ for some $p>4,$, and $\sum_{m=0}^{\infty }m^{2}\alpha _{m}^{1-\frac{1% }{4p}}= \sum_{m=0}^{\infty }m^{2-\zeta\frac{4p-1}{4p}}= O\left( 1\right)$ because of $\zeta > 3\frac{4p}{4p-1}$ and $p>4$. By applying similar arguments, we can also show \begin{equation*} I_{2},I_{3},I_{4}={\cal O}_{p}\left( 1\right) . \end{equation*} \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Supplement to the Proof of Theorem \ref{th:limdis}} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \paragraph{\underline{Notation $\mathbb{E}_{\cal C}$ and ${\rm Var}_{\cal C}$ and ${\rm Cov}_{\cal C}$:}} In the remainder of this supplementary file we write $\mathbb{E}_{\cal C}$, ${\rm Var}_{\cal C}$ and ${\rm Cov}_{\cal C}$ for the expectation, variance and covariance operators conditional on ${\cal C}$,~i.e., $\mathbb{E}_{\cal C}(A)=\mathbb{E}(A|{\cal C})$, ${\rm Var}_{\cal C}(A) = {\rm Var}(A|{\cal C})$ and ${\rm Cov}_{\cal C}(A,B) = {\rm Cov}(A,B |{\cal C})$. \bigskip What is left to show to complete the proof of Theorem \ref{th:limdis} is that Lemma~\ref{lemma:vanishing} and Lemma~\ref{lemma:denCLT} in the main text appendix hold. Before showing this, we first present two further intermediate lemmas. \begin{lemma} \label{lemma:normXweak} Under the assumptions of Theorem~\ref{th:limdis} we have for $k=1,\ldots ,K$, \begin{align*} \qquad && (a) && \| P_{\lambda^0} \widetilde X_k \| &= o_p(\sqrt{NT}) \; , \nonumber \\ \qquad && (b) && \| \widetilde X_k P_{f^0}\| &= o_p(\sqrt{NT}) \; , \nonumber \\ \qquad && (c) && \|P_{\lambda^0} e X^{\prime}_k \| &= o_p(N^{3/2}) \, , \nonumber \\ \qquad && (d) && \|P_{\lambda^0} e P_{f^0} \| &= {\cal O}_p(1) \; . && \qquad \end{align*} \end{lemma} \begin{proof}[\bf Proof of Lemma~\ref{lemma:normXweak}] \# Part (a): We have \begin{align*} \|P_{\lambda^0} \widetilde X_k \| &= \|\lambda^0 (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} \widetilde X_k \| \nonumber \\ &\leq \|\lambda^0 (\lambda^{0\prime}\lambda^0)^{-1}\| \| \lambda^{0\prime} \widetilde X_k \| \nonumber \\ &\leq \|\lambda^0 \| \| (\lambda^{0\prime}\lambda^0)^{-1}\| \| \lambda^{0\prime} \widetilde X_k \|_F = {\cal O}_p(N^{-1/2}) \| \lambda^{0\prime} \widetilde X_k \|_F \; , \end{align*} where we used part (i) and (ii) of Lemma~\ref{lemma:inequalities} and Assumption~\ref{ass:A1}. We have \begin{align*} \mathbb{E}\left\{ \mathbb{E}_{\cal C}\left[ \| \lambda^{0\prime} \widetilde X_k \|_F^2 \right] \right\} &= \mathbb{E}\left\{ \sum_{r=1}^R \sum_{t=1}^T \mathbb{E}_{\cal C} \left[ \left( \sum_{i=1}^N \lambda^0_{ir} \widetilde X_{k,it} \right)^2 \right] \right\} \\ &= \mathbb{E}\left\{ \sum_{r=1}^R \sum_{t=1}^T \sum_{i=1}^N (\lambda^0_{ir})^2 \mathbb{E}_{\cal C} \left( \widetilde X_{k,it}^2 \right) \right\} \\ &= \sum_{r=1}^R \sum_{t=1}^T \sum_{i=1}^N \mathbb{E}\left[ (\lambda^0_{ir})^2 {\rm Var}_{\cal C} \left( X_{k,it} \right) \right] \\ &= {\cal O}_p( NT ) , \end{align*} where we used $\widetilde X_{k,it}$ is mean zero and independent across $i$, conditional on ${\cal C}$, and our bounds on the moments of $\lambda^0_{ir}$ and $X_{k,it}$. We therefore have $ \| \lambda^{0\prime} \widetilde X_k \|_F = {\cal O}_p(\sqrt{NT})$ and the above inequality thus gives $\|P_{\lambda^0} \widetilde X_k \| = {\cal O}_p(\sqrt{T} ) = o_p(\sqrt{NT})$. \# The proof for part (b) is similar. As above we first obtain $ \| \widetilde X_k P_{f^0}\| = \|P_{f^0} \widetilde X_k' \| \leq {\cal O}_p(T^{-1/2}) \| f^{0\prime} \widetilde X_k' \|_F$. Next, we have \begin{align*} \mathbb{E}_{\cal C}\left[ \| f^{0\prime} \widetilde X_k' \|_F^2 \right] &= \sum_{r=1}^R \sum_{i=1}^N \mathbb{E}_{\cal C} \left[ \left( \sum_{t=1}^T f^0_{tr} \widetilde X_{k,it} \right)^2 \right] \\ &= \sum_{r=1}^R \sum_{i=1}^N \sum_{t,s=1}^T f^0_{tr} f^0_{sr} \mathbb{E}_{\cal C} \left( \widetilde X_{k,it} \widetilde X_{k,is} \right) \\ &\leq \left[ \sum_{r=1}^R \left( \max_t | f^0_{tr} | \right)^2 \right] \sum_{i=1}^N \sum_{t,s=1}^T \left| {\rm Cov}_{\cal C} \left( X_{k,it} , X_{k,is} \right) \right| \\ &= {\cal O}_p(T^{2/(4+\epsilon)}) \, {\cal O}_p(NT) = o_p(N T^2) , \end{align*} where we used that uniformly bounded $\mathbb{E} \| f^0_t \|^{4+\epsilon}$ implies $\max_t | f^0_{tr} | = {\cal O}_p( T^{1/(4+\epsilon)} )$. We thus have $ \| f^{0\prime} \widetilde X_k' \|_F^2 = o_p(T \sqrt{N})$ and therefore $\| \widetilde X_k P_{f^0}\| = o_p(\sqrt{NT})$. \# Next, we show part (c). First, we have \begin{align*} \mathbb{E} \left\{ \mathbb{E}_{\cal C} \left[ \left( \|\lambda^{0\,\prime} e X'_k \|_F \right)^2 \right] \right\} &= \mathbb{E} \left\{ \mathbb{E}_{\cal C} \left[ \sum_{r=1}^R \sum_{j=1}^N \left( \sum_{i=1}^N \sum_{t=1}^T \lambda^{0}_{ir} e_{it} X_{k,jt} \right)^2 \right] \right\} \nonumber \\ &= \mathbb{E} \left\{ \sum_{r=1}^R \sum_{i,j,l=1}^N \sum_{t,s=1}^T \lambda^{0}_{ir} \lambda^{0}_{lr} \mathbb{E}_{\cal C} \left( e_{it} e_{ls} X_{k,jt} X_{k,js} \right) \right\} \nonumber \\ &= \sum_{r=1}^R \sum_{i,j=1}^N \sum_{t=1}^T \mathbb{E} \left[ (\lambda^{0}_{ir})^2 \mathbb{E}_{\cal C} \left( e_{it}^2 X_{k,jt}^2 \right) \right] = {\cal O}(N^2 T) \; , \end{align*} where we used that $ \mathbb{E}_{\cal C} \left( e_{it} e_{ls} X_{k,jt} X_{k,js} \right)$ is only non-zero if $i=l$ (because of cross-sectional independence conditional on ${\cal C}$) and $t=s$ (because regressors are pre-determined). We can thus conclude $\|\lambda^{0\,\prime} e X'_k \|_F = {\cal O}_p(N \sqrt{T})$. Using this we find \begin{align*} \|P_{\lambda^0} e X'_k \| &= \|\lambda^0 (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} e X'_k \| \nonumber \\ &\leq \|\lambda^0 (\lambda^{0\prime}\lambda^0)^{-1}\| \| \lambda^{0\prime} e X'_k \| \nonumber \\ &\leq \|\lambda^0 \| \| (\lambda^{0\prime}\lambda^0)^{-1}\| \| \lambda^{0\prime} e X'_k \|_F = {\cal O}_p(N^{-1/2}) {\cal O}_p(N \sqrt{T}) = {\cal O}_p(\sqrt{NT}) \; . \end{align*} This is what we wanted to show. \# For part (d), we first find $\frac{1}{\sqrt{NT}}\left\Vert f^{0\prime}e\lambda^0 \right\Vert_{F}={\cal O}_{p}\left( 1\right)$, because \begin{eqnarray*} \mathbb{E} \left\{ \mathbb{E}_{\cal C} \left[ \left( \frac{\left\Vert f^{0\prime }e\lambda^0 \right\Vert _{F}}{\sqrt{NT}}% \right) ^{2} \right] \right\} &=&\mathbb{E} \left\{ \frac{1}{NT}\mathbb{E}_{\cal C} \left[ \left( \sum_{i=1}^{N}\sum_{t=1}^{T}e_{it}f_{t}^{0\prime }\lambda^0_{i}\right) ^{2} \right] \right\} \nonumber \\ &=& \mathbb{E} \left\{ \frac{1}{NT}\sum_{i=1}^{N}\sum_{j=1}^{N}\sum_{t=1}^{T}\sum_{s=1}^{T}\mathbb{E}_{\cal C}% \left( e_{it}e_{js} \right) f_{t}^{0\prime }\lambda_{i}^0\lambda_{j}^{0\prime}f^0_{s} \right\} \nonumber \\ &=&\frac{1}{NT}\sum_{i=1}^{N}\sum_{t=1}^{T} \mathbb{E}\left[ \mathbb{E}_{\cal C} \left( e_{it}^2 \right) f_{t}^{0\prime }\lambda^0_{i}\lambda_{i}^{0\prime }f^0_{t} \right] \nonumber \\ &=&{\cal O}\left( 1\right) , \end{eqnarray*} where we used $e_{it}$ is independent across $i$ and over $t$, conditional on ${\cal C}$. Thus we obtain \begin{align*} \|P_{\lambda^0} e P_{f^0} \| &= \| \lambda^0 (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} e f^0 (f^{0\prime}f^0)^{-1} \, f^{0\prime} \| \nonumber \\ &\leq \| \lambda^0 \| \left\| (\lambda^{0\prime}\lambda^0)^{-1} \right\| \| \lambda^{0\prime} e f^0 \| \left\| (f^{0\prime}f^0)^{-1} \right\| \| f^{0\prime} \| \nonumber \\ &\leq {\cal O}_p(N^{1/2}) {\cal O}_p(N^{-1}) \| \lambda^{0\prime} e f^0 \|_F {\cal O}_p(T^{-1}) {\cal O}_p(T^{1/2}) = {\cal O}_p(1) \;, \end{align*} where we used part (i) and (ii) of Lemma~\ref{lemma:inequalities}. \end{proof} \begin{lemma} \label{lemma:eeterms} Suppose $A$ and $B$ are $T\times T$ and $N\times N$ matrices that are independent of $e$, conditional on ${\cal C}$, such that $\mathbb{E}_{\cal C}\left( \left\Vert A\right\Vert _{F}^{2} \right)={\cal O}_p\left( NT\right) $ and $\mathbb{E}_{\cal C} \left( \left\Vert B\right\Vert _{F}^{2} \right)={\cal O}_p\left( NT\right)$, and let Assumption~\ref{ass:A5} be satisfied. Then there exists a finite non-random constant $c_0$ such that \begin{align*} (a) && \mathbb{E}_{\cal C}\left( \left\{ \limfunc{Tr}\left[ \left( e^{\prime }e-\mathbb{E}_{\cal C} \left( e^{\prime }e\right) \right) A\right] \right\}^2 \right) &\leq c_0 \, N \, \mathbb{E}_{\cal C} \left( \left\Vert A\right\Vert _{F}^{2} \right) \; , \nonumber \\ (b) && \mathbb{E}_{\cal C}\left( \left\{ \limfunc{Tr}\left[ \left( ee^{\prime }-\mathbb{E}_{\cal C} \left( ee^{\prime }\right) \right) B \right] \right\}^{2} \right) &\leq c_0 \, T \, \mathbb{E}_{\cal C} \left( \left\Vert B\right\Vert _{F}^{2} \right) \; . \end{align*} \end{lemma} \begin{proof}[\bf Proof] \# Part (a): Denote $A_{ts}$ to be the $\left( t,s\right) ^{th}$ element of $A$. We have \begin{align*} \limfunc{Tr}\left\{ \left( e^{\prime }e-\mathbb{E}_{\cal C}\left( e^{\prime }e\right) \right) A\right\} &= \sum_{t=1}^{T}\sum_{s=1}^{T}\left( e^{\prime }e-\mathbb{E}_{\cal C}\left( e^{\prime }e\right) \right) _{ts}A_{ts} \nonumber \\ &= \sum_{t=1}^{T}\sum_{s=1}^{T}\left( \sum_{i=1}^{N}\left( e_{it}e_{is}-\mathbb{E}_{\cal C}\left( e_{it}e_{is}\right) \right) \right) A_{ts}. \end{align*}% Therefore, \begin{align*} &\mathbb{E}_{\cal C}\left( \limfunc{Tr}\left\{ \left( e^{\prime }e-\mathbb{E}_{\cal C}\left( e^{\prime }e\right) \right) A\right\} \right) ^{2} \nonumber \\ &\qquad = \sum_{t=1}^{T}\sum_{s=1}^{T}\sum_{p=1}^{T}\sum_{q=1}^{T}\mathbb{E}_{\cal C}\left[ \left( \sum_{i=1}^{N}\left( e_{it}e_{is}-\mathbb{E}_{\cal C}\left( e_{it}e_{is}\right) \right) \right) \left( \sum_{j=1}^{N}\left( e_{jp}e_{jq}-\mathbb{E}_{\cal C}\left( e_{jp}e_{jq}\right) \right) \right) \right] \mathbbm{E}_{\cal C} \left( A_{ts}A_{pq} \right). \end{align*}% Let $\Sigma_{it}=\mathbb{E}_{\cal C}(e_{it}^2)$. Then we find \begin{align*} &\mathbb{E}_{\cal C}\left\{ \left( \sum_{i=1}^{N}\left( e_{it}e_{is}-\mathbb{E}_{\cal C}\left( e_{it}e_{is}\right) \right) \right) \left( \sum_{j=1}^{N}\left( e_{jp}e_{jq}-\mathbb{E}_{\cal C}\left( e_{jp}e_{jq}\right) \right) \right) \right\} \nonumber \\ &\qquad\qquad\qquad\qquad =\sum_{i=1}^{N}\sum_{j=1}^{N}\left\{ \mathbb{E}_{\cal C}\left( e_{it}e_{is}e_{jp}e_{jq}\right) -\mathbb{E}_{\cal C}\left( e_{it}e_{is}\right) \mathbb{E}_{\cal C}\left( e_{jp}e_{jq}\right) \right\} \nonumber \\ &\qquad\qquad\qquad\qquad =\left\{ \begin{array}{l@{\quad}l} \Sigma _{it}\Sigma _{is} & \text{ if }\left( t=p\right) \neq \left( s=q\right) \text{ and }\left( i=j\right) \\ \Sigma _{it}\Sigma _{is} & \text{ if }\left( t=q\right) \neq \left( s=p\right) \text{ and }\left( i=j\right) \\ \mathbb{E}_{\cal C}\left( e_{it}^{4}\right) -\Sigma _{it}^{2} & \text{ if }\left( t=s=p=q\right) \text{ and }\left( i=j\right) \\ 0 & \text{ otherwise.} \end{array} \right. \end{align*}% Therefore, \begin{align*} &\mathbb{E}_{\cal C}\left( \limfunc{Tr}\left\{ \left( e^{\prime }e-\mathbb{E}_{\cal C}\left( e^{\prime }e\right) \right) A\right\} \right) ^{2} \\ & \qquad \leq \sum_{t=1}^{T}\sum_{s=1}^{T}\sum_{i=1}^{N}\Sigma _{it}\Sigma _{is}\left( \mathbb{E}_{\cal C} \left( A_{ts}^{2}\right) +\mathbb{E}_{\cal C} \left( A_{ts}A_{st}\right) \right) +\sum_{t=1}^{T}\sum_{i=1}^{N}\left( \mathbb{E}_{\cal C}\left( e_{it}^{4}\right) -\Sigma _{it}^{2}\right) \mathbb{E}_{\cal C} A_{tt}^{2}. \end{align*}% Define $\Sigma ^{i}={\rm diag}\left( \Sigma _{i1},...,\Sigma _{iT} \right) .$ Then, we have% \begin{eqnarray} \sum_{t=1}^{T}\sum_{s=1}^{T}\sum_{i=1}^{N}\Sigma _{it}\Sigma _{is}\left( \mathbb{E}_{\cal C} A_{ts}^{2}\right) &=&\mathbb{E}_{\cal C} \left( \sum_{i=1}^{N}\limfunc{Tr}\left( A^{\prime }\Sigma ^{i}A\Sigma ^{i}\right) \right) \nonumber \\ &\leq &\sum_{i=1}^{N}\mathbb{E}_{\cal C} \left\Vert A\Sigma ^{i}\right\Vert _{F}^{2}\leq \sum_{i=1}^{N}\left\Vert \Sigma ^{i}\right\Vert ^{2}\mathbb{E}_{\cal C} \left\Vert A\right\Vert _{F}^{2} \nonumber \\ &\leq &N\left( \sup_{it}\Sigma _{it}^{2}\right) \mathbb{E}_{\cal C} \left\Vert A\right\Vert _{F}^{2}. \end{eqnarray}% Also,% \begin{eqnarray} \sum_{t=1}^{T}\sum_{s=1}^{T}\sum_{i=1}^{N}\Sigma _{it}\Sigma _{is}\mathbb{E}_{\cal C} \left( A_{ts}A_{st}\right) &=&\mathbb{E}_{\cal C} \left[ \sum_{i=1}^{N}\limfunc{Tr}\left( \Sigma ^{i}AA\Sigma ^{i}\right) \right] \nonumber \\ &\leq &\sum_{i=1}^{N}\mathbb{E}_{\cal C} \left\Vert \Sigma ^{i}A\right\Vert _{F}\left\Vert A\Sigma ^{i}\right\Vert _{F}\leq \sum_{i=1}^{N}\left\Vert \Sigma ^{i}\right\Vert ^{2}\mathbb{E}_{\cal C} \left\Vert A\right\Vert _{F}^{2} \nonumber \\ &\leq &N\left( \sup_{it}\Sigma _{it}^{2}\right) \mathbb{E}_{\cal C} \left\Vert A\right\Vert _{F}^{2} \; . \end{eqnarray}% Finally, \begin{eqnarray} \sum_{t=1}^{T}\sum_{i=1}^{N}\left( \mathbb{E}_{\cal C}\left( e_{it}^{4}\right) -\Sigma _{it}^{2}\right) \mathbb{E}_{\cal C} A_{tt}^{2} &\leq &N\left( \sup_{it}\mathbb{E}_{\cal C}\left( e_{it}^{4}\right) \right) \mathbb{E}_{\cal C} \left\Vert A\right\Vert _{F}^{2}, \end{eqnarray} and $\sup_{it}\mathbb{E}_{\cal C}\left( e_{it}^{4}\right)$ is assumed bounded by Assumption~\ref{ass:A5}$(vi)$. \# Part (b): The proof is analogous to the proof of part (a). \end{proof} \begin{proof}[\bf Proof of Lemma~\ref{lemma:vanishing}] \# For part (a) we have \begin{align*} \left| \frac 1 {\sqrt{NT}} {\rm Tr} \left( P_{f^0} \, e^{\prime}\, P_{\lambda^0} \, \widetilde X_k \right) \right| &= \left| \frac 1 {\sqrt{NT}} {\rm Tr} \left( P_{f^0} \, e^{\prime}\, P_{\lambda^0} P_{\lambda^0} \widetilde X_k P_{f^0} \right) \right| \nonumber \\ & \leq \frac R {\sqrt{NT}} \left\| P_{\lambda^0} \, e \, P_{f^0} \right\| \left\| P_{\lambda^0} \widetilde X_k \right\| \left\| P_{f^0} \right\| \nonumber \\ &= \frac{1}{\sqrt{NT}} \; {\cal O}_p(1) \, o_p(\sqrt{NT}) \, {\cal O}_p(1) \nonumber \\ &= o_p(1), \end{align*} where the second-last equality follows by Lemma~\ref{lemma:normXweak} (a) and (d). \# To show statement (b) we define $\zeta_{k,ijt} = e_{it} \widetilde X_{k,jt}$. We then have \begin{align*} \frac 1 {\sqrt{NT}} {\rm Tr} \left( P_{\lambda^0} \, e \,\widetilde X_k' \right) &= \sum_{r,q=1}^R \left[\left( \frac{\lambda^{0 \prime} \lambda^{0}} N \right)^{-1} \right]_{rq} \underbrace{ \frac 1 {N \sqrt{NT}} \sum_{t=1}^T \sum_{i,j=1}^N \lambda_{ir}^{0} \lambda_{jq}^{0} \zeta_{k,ijt} }_{ \equiv A_{k,rq} } . \end{align*} We only have $ \mathbb{E_{\cal C}}\left( \zeta_{k,ijt} \zeta_{k,lm s} \right) \neq 0$ if $t=s$ (because regressors are pre-determined) and $i=l$ and $j=m$ (because of cross-sectional independence). Therefore \begin{align*} \mathbb{E}\left\{ \mathbb{E}_{\cal C}\left( A_{k,rq}^2 \right) \right\} &= \mathbb{E}\left\{ \frac 1 {N^3 T} \sum_{t,s=1}^T \sum_{i,j,l,m=1}^N \lambda_{ir} \lambda_{jq} \lambda_{lr} \lambda_{mq} \, \mathbb{E}_{\cal C}\left( \zeta_{k,ijt} \zeta_{k,lm s} \right) \right\} \\ &= \frac 1 {N^3 T} \sum_{t=1}^T \sum_{i,j=1}^N \mathbb{E}\left[ \lambda_{ir}^2 \lambda_{jq}^2 \, \mathbb{E}_{\cal C}\left( \zeta_{k,ijt}^2 \right) \right] = {\cal O}(1/N) = o_p(1). \end{align*} We thus have $A_{k,rq} = o_p(1)$ and therefore also $ \frac 1 {\sqrt{NT}} {\rm Tr} \left( P_{\lambda^0} \, e \,\widetilde X_k' \right) = o_p(1)$. \# The proof for statement (c) is similar to the proof of statement (b). Define $\xi_{k,its} = e_{it} \widetilde X_{k,is} - \mathbb{E}_{\cal C}\left( e_{it} \widetilde X_{k,is} \right)$. We then have \begin{align*} \frac 1 {\sqrt{NT}} {\rm Tr} \left\{ P_{f^0} \, \left[ e^{\prime} \, \widetilde X_k - \mathbb{E}_{\cal C}\left( e^{\prime} \, \widetilde X_k \right) \right] \right\} &= \sum_{r,q=1}^R \left[\left( \frac{f' f} T \right)^{-1} \right]_{rq} \underbrace{ \frac 1 {T \sqrt{NT}} \sum_{i=1}^N \sum_{t,s=1}^T f_{tr} f_{sq} \xi_{k,its} }_{ \equiv B_{k,rq} } . \end{align*} Therefore \begin{align*} \mathbb{E}_{\cal C}\left( B_{k,rq}^2 \right) &= \frac 1 {T^3 N} \sum_{i,j=1}^N \sum_{t,s,u,v=1}^T f_{tr} f_{sq} f_{ur} f_{vq} \mathbb{E}_{\cal C}\left( \xi_{k,its} \xi_{k,juv} \right) \\ &\leq \left( \max_{t,\widetilde r} | f_{t \widetilde r} | \right)^4 \frac 1 {T^3 N} \sum_{i,j=1}^N \sum_{t,s,u,v=1}^T \left| {\rm Cov}_{\cal C}\left( e_{it} \widetilde X_{k,is} , e_{ju} \widetilde X_{k,jv} \right) \right| \\ &= \left( \max_{t,\widetilde r} | f_{t \widetilde r} | \right)^4 \frac 1 {T^3 N} \sum_{i=1}^N \sum_{t,s,u,v=1}^T \left| {\rm Cov}_{\cal C}\left( e_{it} \widetilde X_{k,is} , e_{iu} \widetilde X_{k,iv} \right) \right| \\ &= {\cal O}_p( T^{4/(4+\epsilon)} ) {\cal O}_p( 1/T ) \\ &= o_p(1), \end{align*} where we used uniformly bounded $\mathbb{E} \| f^0_t \|^{4+\epsilon}$ implies $\max_t | f^0_{tr} | = {\cal O}_p( T^{1/(4+\epsilon)} )$. \# Part (d) and (e): We have $\|\lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime}\| = {\cal O}_p((NT)^{-1/2})$, $\|e\|={\cal O}_p(N^{1/2})$, $\|X_k\|={\cal O}_p(\sqrt{NT})$ and $\|P_{\lambda^0} e P_{f^0} \| = {\cal O}_p(1)$, which was shown in Lemma~\ref{lemma:normXweak}. Therefore: \begin{align*} &\frac 1 {\sqrt{NT}} {\rm Tr}\left(e P_{f^0} \, e' \, M_{\lambda^0} \, X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right) \nonumber \\ &\qquad \qquad = \frac 1 {\sqrt{NT}} {\rm Tr}\left(P_{\lambda^0} e P_{f^0} \, e' \, M_{\lambda^0} \, X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right) \nonumber \\ &\qquad \qquad \leq \frac R {\sqrt{NT}} \left\|P_{\lambda^0} e P_{f^0}\right\| \|e\| \|X_k\| \left\| f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\| = {\cal O}_p(N^{-1/2}) = o_p(1) \; . \end{align*} which shows statement (d). The proof for part (e) is analogous. \# To prove statement (f) we need to use in addition $ \| P_{\lambda^0} \, e \, X'_k \| = o_p(N^{3/2})$, which was also shown in Lemma~\ref{lemma:normXweak}. We find \begin{align*} & \frac 1 {\sqrt{NT}} {\rm Tr}\left(e^{\prime}M_{\lambda^0} \, X_k \, M_{f^0} \, e^{\prime} \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \right) \nonumber \\ & \qquad\qquad = \frac 1 {\sqrt{NT}} {\rm Tr}\left(e^{\prime}M_{\lambda^0} \, X_k \, e^{\prime} \, P_{\lambda^0} \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \right) \nonumber \\ & \qquad\qquad \qquad - \frac 1 {\sqrt{NT}} {\rm Tr}\left(e^{\prime}M_{\lambda^0} \, X_k \, P_{f^0} \, e^{\prime}\, P_{\lambda^0} \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \right) \nonumber \\ & \qquad\qquad \leq \frac R {\sqrt{NT}} \| e \| \| P_{\lambda^0} \, e \, X'_k \| \, \| \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \| \nonumber \\ & \qquad\qquad \qquad - \frac R {\sqrt{NT}} \| e \| \| X_k \| \| P_{\lambda^0} \, e \, P_{f^0} \| \| \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \| \nonumber \\ &\qquad\qquad = o_p(1) \; . \end{align*} \# Now we want to prove part (g) and (h) of the present lemma. For part (g) we have \begin{align*} & \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] \, M_{\lambda^0} \, X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\} \nonumber \\ &= \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] \, M_{\lambda^0} \, \overline X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\} \\ & \qquad + \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] \, M_{\lambda^0} \, \widetilde X_k P_{f^0} \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\} \nonumber \\ &= \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] \, M_{\lambda^0} \, \overline X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\} \\ & \qquad + \frac 1 {\sqrt{NT}} \left\| e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right\| \left\| \widetilde X_k P_{f^0} \right\| \left\| f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\| \nonumber \\ &= \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] \, M_{\lambda^0} \, \overline X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\} + o_p(1) . \end{align*} Thus, what is left to prove is $ \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] \, M_{\lambda^0} \, \overline X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\} = o_p(1)$. For this we define \begin{align*} B_k &= M_{\lambda^0} \, \overline X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \; . \end{align*} Using part (i) and (ii) of Lemma~\ref{lemma:inequalities} we find \begin{align*} \| B_k \|_F &\leq R^{1/2} \|B_k\| \nonumber \\ &\leq R^{1/2} \| \overline X_k \| \left\| f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\| \nonumber \\ &\leq R^{1/2} \| \overline X_k \|_F \left\| f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\| \; . \end{align*} and therefore \begin{align*} \mathbb{E}_{\cal C} \left( \| B_k \|_F^2 \right) &\leq R \left\| f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \right\|^2 \mathbb{E}_{\cal C} \left( \| \overline X_k \|_F^2 \right) \nonumber \\ &= {\cal O}(1) \; , \end{align*} where we used $ \mathbb{E}_{\cal C}\left( \| \overline X_k \|_F^2 \right) = {\cal O}(NT)$, which is true because we assumed uniformly bounded moments of $\overline X_{k,it}$. Applying Lemma~\ref{lemma:eeterms} we therefore find \begin{align*} \mathbb{E}_{\cal C} \left( \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] B_k \right\} \right)^2 &\leq c_0 \, \frac T {NT} \, \mathbb{E}_{\cal C} \left( \| B_k \|_F^2 \right) = o(1) \; , \end{align*} and thus \begin{align*} \frac 1 {\sqrt{NT}} {\rm Tr}\left\{ \left[ e e' - \mathbb{E}_{\cal C} \left( e e' \right) \right] B_k \right\} &= o_p(1) \; , \end{align*} which is what we wanted to show. The proof for part (h) is analogous. \# Part (i): Conditional on ${\cal C}$ the expression $e_{it}^2 \mathfrak{X}_{it} \, \mathfrak{X}_{it}' - \mathbb{E}_{\cal C} \left( e_{it}^2 \, \mathfrak{X}_{it} \, \mathfrak{X}_{it}' \right)$ is mean zero, and it is also uncorrelated across $i$. This together with the bounded moments that we assume implies \begin{align*} {\rm Var}_{\cal C}\left\{ \frac 1 {NT} \, \sum_{i=1}^N \, \sum_{t=1}^T \left[ e_{it}^2 \, \mathfrak{X}_{it} \, \mathfrak{X}_{it}' - \mathbb{E}_{\cal C} \left( e_{it}^2 \, \mathfrak{X}_{it} \, \mathfrak{X}_{it}' \right) \right] \right\} &= {\cal O}_p(1/N) = o_p(1) , \end{align*} which shows the required result. \# Part (j): Define the $K \times K$ matrix $A = \frac 1 {NT} \, \sum_{i=1}^N \, \sum_{t=1}^T \, e_{it}^2 \left( \mathfrak{X}_{it} + {\cal X}_{it} \right) \left( \mathfrak{X}_{it} - {\cal X}_{it} \right)'$. Then we have \begin{align*} \frac 1 {NT} \, \sum_{i=1}^N \, \sum_{t=1}^T \, e_{it}^2 \left( \mathfrak{X}_{it} \, \mathfrak{X}_{it}' - {\cal X}_{it} \, {\cal X}_{it}' \right) &= \frac 1 2 \left( A + A' \right). \end{align*} Let $B_k$ be the $N \times T$ matrix with elements $B_{k,it} = e_{it}^2 \left( \mathfrak{X}_{k,it} + {\cal X}_{k,it} \right)$. We have $\| B_k \| \leq \|B_k\|_F = {\cal O}_p( \sqrt{NT} )$, because the moments of $B_{k,it}$ are uniformly bounded. The components of $A$ can be written as $A_{l k} = \frac 1 {NT} {\rm Tr}[ B_l ( \mathfrak{X}_{k} - {\cal X}_{k})' ]$. We therefore have \begin{align*} | A_{l k} | \leq \frac 1 {NT} {\rm rank}( \mathfrak{X}_{k} - {\cal X}_{k} ) \| B_l \| \left\| \mathfrak{X}_{k} - {\cal X}_{k} \right\| . \end{align*} We have $ \mathfrak{X}_{k} - {\cal X}_{k} = \widetilde X_k \, P_{f^0} + P_{\lambda^0} \, \widetilde X_k \, M_{f^0}$. Therefore ${\rm rank}( \mathfrak{X}_{k} - {\cal X}_{k} ) \leq 2 R$ and \begin{align*} | A_{l k} | & \leq \frac{2 R} {NT} \| B_l \| \left( \left\| \widetilde X_k \, P_{f^0} \right| + \left\| P_{\lambda^0} \, \widetilde X_k \, M_{f^0} \right\| \right) \\ &\leq \frac{2 R} {NT} \| B_l \| \left( \left\| \widetilde X_k \, P_{f^0} \right| + \left\| P_{\lambda^0} \, \widetilde X_k \right\| \right) = \frac{2 R} {NT} {\cal O}_p( \sqrt{NT} ) o_p( \sqrt{NT} ) = o_p(1), \end{align*} where we used Lemma~\ref{lemma:normXweak}. This shows the desired result. \end{proof} \begin{proof}[\bf Proof of Lemma~\ref{lemma:denCLT}] Let $c$ be a $K$-vector such that $\left\Vert c\right\Vert =1.$ The required result follows by the Cramer-Wold device, if we show \begin{align*} \frac{1}{\sqrt{NT}}\sum_{i=1}^{N}\sum_{t=1}^{T}e_{it}\mathfrak{X}_{it}^{\prime }c \, \Rightarrow \, {\cal N}\left( 0,c^{\prime }\Omega c\right) \, . \end{align*} For this, define $\xi_{it} =e_{it}\mathfrak{X}_{it}^{\prime }c$. Furthermore define $\xi_m = \xi_{M,m} = \xi_{NT,it}$, with $M = NT$ and $m = T(i-1)+t \in \{1,\ldots,M\}$. We then have the following: \begin{itemize} \item[(i)] Under Assumption~\ref{ass:A5}$(i)$, $(ii)$, $(iii)$ the sequence $\{ \xi_m, \, m=1,\ldots,M \}$ is a martingale difference sequence under the filtration ${\cal F}_m = {\cal C} \vee \sigma(\{ \xi_{n}: n < m \})$. \item[(ii)] $\mathbb{E}(\xi_{it}^4 )$ is uniformly bounded, because by Assumption~\ref{ass:A5}$(vi)$ $\mathbb{E}_{\cal C} e_{it}^8$ and $\mathbb{E}_{\cal C}\left( \| X_{it} \|^{8+\epsilon} \right)$ are uniformly bounded by a non-random constant (applying Cauchy-Schwarz and the law of iterated expectations). \item[(iii)] $\frac 1 {M} \sum_{m=1}^M \xi_{m}^2 = c' \Omega c + o_p(1)$. \\ This is true, because firstly under our assumptions we have $\mathbb{E}_{\cal C} \left\{ \left[ \frac 1 {M} \sum_{m=1}^M \left( \xi_{m}^2 - \mathbb{E}_{\cal C}( \xi_{m}^2) \right) \right]^2 \right\} = \mathbb{E}_{\cal C} \left\{ \frac 1 {M^2} \sum_{m=1}^M \left( \xi_{m}^2 - \mathbb{E}_{\cal C}( \xi_{m}^2) \right)^2 \right\} = {\cal O}_P(1/M) = o_P(1)$, implying we have $\frac 1 {M} \sum_{m=1}^M \xi_{m}^2 = \frac 1 {M} \sum_{m=1}^M \mathbb{E}_{\cal C}( \xi_{m}^2 ) + o_p(1)$. We furthermore have $\frac 1 {M} \sum_{m=1}^M \mathbb{E}_{\cal C}( \xi_{m}^2 ) = {\rm Var}_{\cal C}( M^{-1/2} \sum_{m=1}^M \xi_{m} )$, and using the result in equation \eqref{VarEqOmega} of the main text we find ${\rm Var}_{\cal C}( M^{-1/2} \sum_{m=1}^M \xi_{m} ) ={\rm Var}_{\cal C}( (NT)^{-1/2} \sum_{i=1}^N \sum_{t=1}^T \xi_{it} ) = c' \Omega c + o_p(1)$. \end{itemize} These three properties of $\{ \xi_m, \, m=1,\ldots,M \}$ allow us to apply Corollary~5.26 in White~\cite*{White2001}, which is based on Theorem~2.3 in Mcleish~\cite*{Mcleish1974}, to obtain $\frac 1 { \sqrt{M}} \sum_{m=1}^M \xi_{m} \to_d {\cal N}(0,c' \Omega c)$. This concludes the proof, because $ \frac 1 { \sqrt{M}} \sum_{m=1}^M \xi_{m} = \frac{1}{\sqrt{NT}}\sum_{i=1}^{N}\sum_{t=1}^{T}e_{it}\mathfrak{X}_{it}^{\prime }c$. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Expansions of Projectors and Residuals} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% The incidental parameter estimators $\widehat f$ and $\widehat \lambda$ as well as the residuals $\widehat e$ enter into the asymptotic bias and variance estimators for the LS estimator $\widehat \beta$. To describe the properties of $\widehat f$, $\widehat \lambda$ and $\widehat e$, it is convenient to have asymptotic expansions of the projectors $M_{\widehat \lambda}(\beta)$ and $M_{\widehat f}(\beta)$ that correspond to the minimizing parameters $\widehat \lambda(\beta)$ and $\widehat f(\beta)$ in equation \eqref{LNT123}. Note the minimizing $\widehat \lambda(\beta)$ and $\widehat f(\beta)$ can be defined for all values of $\beta$, not only for the optimal value $\beta=\widehat \beta$. The corresponding residuals are $\widehat e(\beta) = Y \, - \, \beta \cdot X \, - \, \widehat \lambda(\beta) \, \widehat f'(\beta)$. \begin{theorem} \label{theorem:expansions} Under Assumptions~\ref{ass:A1}, \ref{ass:A3}, and \ref{ass:A4}(i) we have the following expansions \begin{align*} M_{\widehat \lambda}(\beta) &= M_{\lambda^0} + M_{\widehat \lambda,e}^{(1)} + M_{\widehat \lambda,e}^{(2)} - \sum_{k=1}^K \left( \beta_k - \beta^0_k \right) M_{\widehat \lambda,k}^{(1)} + M_{\widehat \lambda}^{({\rm rem})}(\beta) \; , \nonumber \\ M_{\widehat f}(\beta) &= M_{f^0} + M_{\widehat f,e}^{(1)} + M_{\widehat f,e}^{(2)} - \sum_{k=1}^K \left( \beta_k - \beta^0_k \right) M_{\widehat f,k}^{(1)} + M_{\widehat f}^{({\rm rem})}(\beta) \; , \nonumber \\ \widehat e(\beta) &= M_{\lambda^0} \, e \, M_{f^0} + \widehat e^{(1)}_e - \sum_{k=1}^K \left( \beta_k - \beta^0_k \right) \widehat e^{(1)}_k + \widehat e^{({\rm rem})}(\beta) \; , \end{align*} where the spectral norms of the remainders satisfy for any series $\eta_{NT} \rightarrow 0$: \begin{align*} \sup_{\{\beta :\left\| \beta -\beta^{0} \right\| \leq \eta_{NT}\}} \frac{\left\| M_{\widehat \lambda}^{({\rm rem})}(\beta) \right\|} { \|\beta - \beta^0\|^2 + (NT)^{-1/2} \, \|e\| \, \|\beta - \beta^0\| \, \, + (NT)^{-3/2} \, \|e\|^3} &= {\cal O}_p\left(1\right) \, , \nonumber \\ \sup_{\{\beta :\left\| \beta -\beta^{0} \right\| \leq \eta_{NT}\}} \frac{\left\| M_{\widehat f}^{({\rm rem})}(\beta) \right\|} { \|\beta - \beta^0\|^2 + (NT)^{-1/2} \, \|e\| \, \|\beta - \beta^0\| \, \, + (NT)^{-3/2} \, \|e\|^3} &= {\cal O}_p\left(1\right) \, , \nonumber \\ \sup_{\{\beta :\left\| \beta -\beta^{0} \right\| \leq \eta_{NT}\}} \frac{ \left\| \widehat e^{({\rm rem})}(\beta) \right\| } { (NT)^{1/2} \|\beta - \beta^0\|^2 + \|e\| \, \|\beta - \beta^0\| + (NT)^{-1} \|e\|^3 } &= {\cal O}_p\left(1\right) \; , \end{align*} and we have ${\rm rank}(\widehat e^{({\rm rem})}(\beta)) \leq 7R$, and the expansion coefficients are given by \begin{align*} M^{(1)}_{\widehat \lambda,e} &= - \, M_{\lambda^0} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} \, - \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \, e' \, M_{\lambda^0} \; , \nonumber \\ M^{(1)}_{\widehat \lambda,k} &= - \, M_{\lambda^0} \, X_k \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} \, - \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \, X'_k \, M_{\lambda^0} \; , \nonumber \\ M^{(2)}_{\widehat \lambda,e} &= M_{\lambda^0} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} \nonumber \\ & \qquad +\lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \, e' \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \, e' \, M_{\lambda^0} \nonumber \\ & \qquad - M_{\lambda^0} \, e \, M_{f^0} \, e' \, \lambda^0\,(\lambda^{0\prime}\lambda^0)^{-1}\,(f^{0\prime}f^0)^{-1}\,(\lambda^{0\prime}\lambda^0)^{-1}\,\lambda^{0\prime} \nonumber \\ & \qquad -\lambda^0\,(\lambda^{0\prime}\lambda^0)^{-1}\,(f^{0\prime}f^0)^{-1}\,(\lambda^{0\prime}\lambda^0)^{-1}\,\lambda^{0\prime} \, e \, M_{f^0} \, e' \, M_{\lambda^0} \nonumber \\ & \qquad - M_{\lambda^0} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \, e' \, M_{\lambda^0} \nonumber \\ & \qquad + \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, f^{0\prime} \, e' \, M_{\lambda^0} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \lambda^{0\prime} \, , \end{align*} analogously \begin{align*} M^{(1)}_{\widehat f,e} &= \, - \, M_{f^0} \, e' \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} f^{0\prime} \, - \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, e \, M_{f^0} \; , \nonumber \\ M^{(1)}_{\widehat f,k} &= \, - \, M_{f^0} \, X'_k \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} f^{0\prime} \, - \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, X_k \,M_{f^0} \; , \nonumber \\ M^{(2)}_{\widehat f,e} &= M_{f^0} \, e' \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} f^{0\prime} \, e' \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} f^{0\prime} \nonumber \\ & \qquad +f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, e \, M_{f^0} \nonumber \\ & \qquad - M_{f^0} \, e' \, M_{\lambda^0} \, e \, f^0\,(f^{0\prime}f^0)^{-1}\,(\lambda^{0\prime}\lambda^0)^{-1}\,(f^{0\prime}f^0)^{-1}\,f^{0\prime} \nonumber \\ & \qquad -f^0\,(f^{0\prime}f^0)^{-1}\,(\lambda^{0\prime}\lambda^0)^{-1}\,(f^{0\prime}f^0)^{-1}\,f^{0\prime} \, e' \, M_{\lambda^0} \, e \, M_{f^0} \nonumber \\ & \qquad - M_{f^0} \, e' \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, e \, M_{f^0} \nonumber \\ & \qquad + f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, e \, M_{f^0} \, e' \, \lambda^0 \, (\lambda^{0\prime}\lambda^0)^{-1} \, (f^{0\prime}f^0)^{-1} f^{0\prime} \, , \end{align*} and finally \begin{align*} \widehat e^{(1)}_k &= M_{\lambda^0} \, X_k \, M_{f^0} \; , \nonumber \\ \widehat e^{(1)}_e &= - M_{\lambda^0} \, e \, M_{f^0} \, e' \, \lambda^0\,(\lambda^{0\prime}\lambda^0)^{-1}\,(f^{0\prime}f^0)^{-1}\,f^{0\prime} \nonumber \\ & \qquad - \lambda^0\,(\lambda^{0\prime}\lambda^0)^{-1}\,(f^{0\prime}f^0)^{-1}\,f^{0\prime} \, e' \, M_{\lambda^0} \, e \, M_{f^0} \nonumber \\ & \qquad - M_{\lambda^0} \, e \, f^0 \, (f^{0\prime}f^0)^{-1} \, (\lambda^{0\prime}\lambda^0)^{-1} \, \lambda^{0\prime} \, e \, M_{f^0} \; . \end{align*} \end{theorem} \begin{proof}[\bf Proof] The general expansion of $M_{\widehat \lambda}(\beta)$ is given in Moon and Weidner \cite*{MoonWeidner2015}, and in the theorem we just make this expansion explicit up to a particular order. The result for $M_{\widehat f}(\beta)$ is just obtained by symmetry ($N \leftrightarrow T$, $\lambda \leftrightarrow f$, $e \leftrightarrow e'$, $X_k \leftrightarrow X_k'$). For the residuals $\widehat e$ we have \begin{align*} \widehat e &= M_{\widehat \lambda} \, \left( Y - \sum_{k=1} \, \widehat \beta_k \, X_k \right) = M_{\widehat \lambda} \, \left[ e - \left( \widehat \beta - \beta^0 \right) \cdot X + \lambda^0 f^{0\prime} \right] \; , \end{align*} and plugging in the expansion of $M_{\widehat \lambda}$ gives the expansion of $\widehat e$. We have $\widehat e(\beta) = A_0 + \lambda^0 f^{0\prime} - \widehat \lambda(\beta) \widehat f'(\beta)$, where $A_0=e - \sum_k (\beta_k-\beta^0_k) X_k$. Therefore $\widehat e^{({\rm rem})}(\beta)=A_1+A_2+A_3$ with $A_1 = A_0 - M_{\lambda^0} \, A_0 \, M_{f^0}$, $A_2 =\lambda^0 f^{0\prime} - \widehat \lambda(\beta) \widehat f'(\beta)$, and $A_3 =-\widehat e^{(1)}_e$. We find ${\rm rank}(A_1)\leq 2R$, ${\rm rank}(A_2)\leq 2R$, ${\rm rank}(A_3)\leq 3R$, and thus ${\rm rank}(\widehat e^{({\rm rem})}(\beta)) \leq 7R$, as stated in the theorem. \end{proof} Having expansions for $M_{\widehat \lambda}(\beta)$ and $M_{\widehat f}(\beta)$, we also have expansions for $P_{\widehat \lambda}(\beta)=\mathbb{I}_N-M_{\widehat \lambda}(\beta)$ and $P_{\widehat f}(\beta)=\mathbb{I}_T-M_{\widehat f}(\beta)$. The reason why we give expansions of the projectors and not expansions of $\widehat \lambda(\beta)$ and $\widehat f(\beta)$ directly is for the latter we would need to specify a normalization, whereas the projectors are independent of any normalization choice. An expansion for $\widehat \lambda(\beta)$ can, for example, be defined by $\widehat \lambda(\beta) = P_{\widehat \lambda}(\beta) \lambda^0$, in which case the normalization of $\widehat \lambda(\beta)$ is implicitly defined by the normalization of $\lambda^0$. %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Consistency Proof for Bias and Variance Estimators (Proof of Theorem~\ref{th:biascorrection})} \label{app:ProofThBias} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% It is convenient to introduce some alternative notation for Definition~\ref{def:estimators} in section~\ref{sec:BiasCorrection} of the main text. \begin{paragraph}{\bf Definition} \it Let $\Gamma: \mathbb{R} \rightarrow \mathbb{R}$ be the truncation kernel defined by $\Gamma(x)=1$ for $|x|\leq 1$, and $\Gamma(x)=0$ otherwise. Let $M$ be a bandwidth parameter that depends on $N$ and $T$. For an $N\times N$ matrix $A$ with elements $A_{ij}$ and a $T\times T$ matrix $B$ with elements $B_{ts}$ we define \begin{itemize} \item[(i)] the diagonal truncations $A^{\rm truncD} = {\rm diag}[ (A_{ii})_{i=1,\ldots,N} ]$ and $B^{\rm truncD} = {\rm diag}[ (B_{tt})_{t=1,\ldots,T} ]$. \item[(ii)] the right-sided Kernel truncation of $B$, which is a $T \times T$ matrix $B^{\rm truncR}$ with elements $B^{\rm truncR}_{ts} = \Gamma\left( \frac{s-t} M \right) B_{ts}$ for $t0$ be finite constants (independent of $n$). Assume $\max_i \, \mathbb{E}_{\cal C} |Z_i|^{\gamma} \leq B$, i.e., the $\gamma$'th moment of the $Z_i$ are finite and uniformly bounded. For $n \rightarrow \infty$ we then have \begin{align} \max_i |Z_i| &= {\cal O}_p\left( n^{1/\gamma} \right) \; . \label{eq:Zbound} \end{align} \end{lemma} \begin{proof}[\bf Proof] Using Jensen's inequality one obtains $\mathbb{E}_{\cal C} \max_i |Z_i| \leq \left( \mathbb{E}_{\cal C} \max_i |Z_i|^\gamma \right)^{1/\gamma} \leq \left( \mathbb{E}_{\cal C} \sum_{i=1}^n |Z_i|^\gamma \right)^{1/\gamma} \leq \left( n \, \max_i \mathbb{E}_{\cal C} |Z_i|^\gamma \right)^{1/\gamma} \leq n^{1/\gamma} \, B^{1/\gamma}$. Markov's inequality then gives equation \eqref{eq:Zbound}. \end{proof} \begin{lemma} \label{lemma:barZ} Let \begin{align*} \bar Z^{(1)}_{k,t\tau} &= N^{-1/2} \sum_{i=1}^N \left[ e_{it} X_{k,i\tau} - \mathbb{E}_{\cal C} \left( e_{it} X_{k,i\tau} \right) \right] \; , \nonumber \\ \bar Z^{(2)}_{t} &= N^{-1/2} \sum_{i=1}^N \left[ e_{it}^2 - \mathbb{E}_{\cal C} \left( e_{it}^2 \right) \right] \; , \nonumber \\ \bar Z^{(3)}_{i} &= T^{-1/2} \sum_{t=1}^T \left[ e_{it}^2 - \mathbb{E}_{\cal C} \left( e_{it}^2 \right) \right] \; . \end{align*} Under assumption \ref{ass:A5} we have \begin{align*} \mathbb{E}_{\cal C} \left| \bar Z^{(1)}_{k,t\tau} \right|^4 &\leq B \; , \nonumber \\ \mathbb{E}_{\cal C} \left| \bar Z^{(2)}_{t\tau} \right|^4 &\leq B \; , \nonumber \\ \mathbb{E}_{\cal C} \left| \bar Z^{(3)}_{i} \right|^4 &\leq B \; , \end{align*} for some $B>0$, i.e., the conditional expectations $\bar Z^{(1)}_{k,t\tau}$, $\bar Z^{(2)}_{t\tau}$, and $\bar Z^{(3)}_{i}$ are uniformly bounded over $t,\tau$, or $i$, respectively. \end{lemma} \begin{proof}[\bf Proof] \# We start with the proof for $\bar Z^{(1)}_{k,t\tau}$. Define $Z^{(1)}_{k,t\tau,i} = e_{it} X_{k,i\tau} - \mathbb{E}_{\cal C} \left( e_{it} X_{k,i\tau} \right)$. By assumption we have finite 8th moments for $e_{it}$ and $X_{k,i\tau}$ uniformly across $k,i,t,\tau$, and thus (using Cauchy Schwarz inequality) we have finite 4th moment of $Z^{(1)}_{k,t\tau,i}$ uniformly across $k,i,t,\tau$. For ease of notation we now fix $k,t,\tau$ and write $Z_i=Z^{(1)}_{k,t\tau,i}$. We have $\mathbb{E}_{\cal C}(Z_i)=0$ and $\mathbb{E}_{\cal C}(Z_{i} Z_{j} Z_{k} Z_{l})=0$ if $i \notin \{j,k,l\}$ (and the same holds for permutations of $i,j,k,l$). Using this we compute \begin{align*} \mathbb{E}_{\cal C} \left( \sum_{i=1}^N \, Z_i \right)^4 &= \sum_{i,j,k,l=1}^N \, \mathbb{E}_{\cal C}\left( Z_i Z_j Z_k Z_l\right) \nonumber \\ &= 3 \, \sum_{i\neq j} \, \mathbb{E}_{\cal C}\left( Z_i^2 \, Z_j^2\right) + \sum_{i} \mathbb{E}_{\cal C}\left( Z_i^4 \right) \nonumber \\ &= 3 \, \sum_{i,j=1}^N \, \mathbb{E}_{\cal C}\left( Z_i^2\right) \, \mathbb{E}_{\cal C}\left( Z_j^2\right) + \sum_{i=1}^N \, \left\{ \mathbb{E}_{\cal C}\left( Z_i^4 \right) - 3 \left[ \mathbb{E}_{\cal C}\left(Z_i^2\right) \right]^2 \right\} \; , \end{align*} Because we argued $\mathbb{E}_{\cal C}\left( Z_i^4 \right)$ is bounded uniformly, the last equation shows $\bar Z^{(1)}_{k,t\tau} = N^{-1/2} \sum_{i=1}^N \, Z^{(1)}_{k,t\tau,i}$ is bounded uniformly across $k,t,\tau$. This is what we wanted to show. \# The proofs for $\bar Z^{(2)}_{t}$ and $\bar Z^{(3)}_{i}$ are analogous. \end{proof} \begin{lemma} \label{Lemma:Trunc} For a $T\times T$ matrix $A$ we have \begin{align*} \left\| A^{\rm truncR} \right\| \, &\leq \, M \left\| A^{\rm truncR} \right\|_{\max} \, \equiv \, M \, \max_t \, \max_{t<\tau\leq t+M} |A_{t\tau}| \, , \end{align*} \end{lemma} Here, for the bounds on $\tau$ we could write $\max(1,t-M)$ instead of $t-M$, and $\min(T,t+M)$ instead of $t+M$, to guarantee $1\leq \tau \leq T$. Since this would complicate notation, we prefer the convention $A_{t\tau}=0$ for $t<1$ or $\tau<1$ of $t>T$ or $\tau>T$. \begin{proof}[\bf Proof] For the $1$-norm of $A^{\rm truncR}$ we find \begin{align*} \left\| A^{\rm truncR} \right\|_1 &= \max_{t=1\ldots T} \, \sum_{\tau=t+1}^{t+M} \, |A_{t\tau}| \nonumber \\ &\leq M \, \max_{t<\tau\leq t+M} \, |A_{t\tau}| = M \left\| A^{\rm truncR} \right\|_{\max} \; , \end{align*} and analogously we find the same bound for the $\infty$-norm $\left\| A^{\rm truncR} \right\|_\infty$. Applying part (vii) of Lemma~\ref{lemma:inequalities} we therefore also get this bound for the operator norm $\| A^{\rm truncR} \|$. \end{proof} \begin{proof}[\bf Proof of Lemma~\ref{lemma:A1A2}] \# We first show $A_1\equiv (NT)^{-1} \sum_{i=1}^N \sum_{t=1}^T e_{it}^2 \left( {\cal X}_{it} {\cal X}_{it}' - \widehat {\cal X}_{it} \widehat {\cal X}_{it}' \right) = o_p(1)$. Let $B_{1,it} = {\cal X}_{it}- \widehat {\cal X}_{it}$, $B_{2,it} = e_{it}^2 {\cal X}_{it}$, and $B_{3,it} = e_{it}^2 \widehat {\cal X}_{it}$. Note $B_1$, $B_2$, and $B_3$ can either be viewed as $K$-vectors for each pair $(i,t)$, or equivalently as $N\times T$ matrices $B_{1,k}$, $B_{2,k}$, and $B_{3,k}$ for each $k=1,\ldots,K$. We have $A_1 = (NT)^{-1} \sum_i \sum_t \left( B_{1,it} B_{2,it}' + B_{3,it} B_{1,it}' \right)$, or equivalently \begin{align*} A_{1,k_1 k_2} &= \frac 1 {NT} {\rm Tr}\left( B_{1,k_1} B_{3,k_2}' + B_{2,k_1} B_{1,k_2}' \right) \; . \end{align*} Using $\|M_{\widehat \lambda} - M_{\lambda^0}\| = {\cal O}_p(N^{-1/2})$, $\|M_{\widehat f} - M_{f^0}\| = {\cal O}_p(N^{-1/2})$, $\|X_k\| = {\cal O}_p(\sqrt{NT}) = {\cal O}_p(N)$, we find for $B_{1,k} = (M_{\lambda^0} - M_{\widehat \lambda}) X_k M_{f^0} + M_{\widehat \lambda} X_k (M_{f^0} - M_{\widehat f}) $ that $\|B_{1,k}\| = {\cal O}_p(N^{1/2})$. In addition we have ${\rm rank}(B_{1,k}) \leq 4R$. We also have \begin{align*} \| B_{2,k} \|^4 &\leq \| B_{2,k} \|_F^4 \nonumber \\ &= \left( \sum_{i=1}^N \sum_{t=1}^T e_{it}^4 {\cal X}_{k,it}^2 \right)^2 \nonumber \\ &\leq \left( \sum_{i=1}^N \sum_{t=1}^T e_{it}^8 \right) \left( \sum_{i=1}^N \sum_{t=1}^T {\cal X}_{k,it}^4 \right) = {\cal O}_p(NT) \, {\cal O}_p(NT) \, , \end{align*} which implies $\| B_{2,k} \|={\cal O}_p(\sqrt{NT})$, and analogously we find $\| B_{3,k} \|={\cal O}_p(\sqrt{NT})$. Therefore \begin{align*} | A_{1,k_1 k_2} | &\leq \frac {4R} {NT} \left( \| B_{1,k_1} \| \| B_{3,k_2}\| + \|B_{2,k_1}\| \| B_{1,k_2}\| \right) \nonumber \\ &= \frac {4R} {NT} \left( {\cal O}_p(N^{1/2}) {\cal O}_p(\sqrt{NT}) + {\cal O}_p(\sqrt{NT}) {\cal O}_p(N^{1/2}) \right) = o_p(1) \; . \end{align*} This is what we wanted to show. \# Finally, we want to show $A_2 \equiv (NT)^{-1} \sum_{i=1}^N \sum_{t=1}^T \left( e_{it}^2 - \widehat e_{it}^2 \right) \widehat {\cal X}_{it} \widehat {\cal X}_{it}' = o_p(1) $. According to theorem \ref{theorem:expansions} we have $e - \widehat e = C_1 + C_2$, where we defined $C_1 = - \sum_{k=1}^K \left( \widehat \beta_k - \beta^0_k \right) \, X_k $, and $C_2=\sum_{k=1}^K \left( \widehat \beta_k - \beta^0_k \right) \left( P_{\lambda^0} \, X_k \, M_{f^0} + X_k \, P_{f^0} \right) + P_{\lambda^0} \, e \, M_{f^0} + e \, P_{f^0} - \widehat e^{(1)}_e - \widehat e^{({\rm rem})}$, which satisfies $\|C_2\|={\cal O}_p(N^{1/2})$, and ${\rm rank}(C_2) \leq 11 R$ (actually, one can easily prove $\leq 5R$, but this does not follow from theorem \ref{theorem:expansions}). Using this notation we have \begin{align*} A_2 &= \frac 1 {NT} \sum_{i=1}^N \sum_{t=1}^T (e_{it}+\widehat e_{it}) (C_{1,it} + C_{2,it}) \widehat {\cal X}_{it} \widehat {\cal X}_{it}' \; , \end{align*} which can also be written as \begin{align*} A_{2,k_1 k_2} &= - \,\sum_{k_3=1}^K \left( \widehat \beta_{k_3} - \beta^0_{k_3} \right) \left( C_{5,k_1 k_2 k_3} + C_{6,k_1 k_2 k_3} \right) + \frac 1 {NT} {\rm Tr}\left( C_2 \, C_{3,k_1 k_2} \right) + \frac 1 {NT} {\rm Tr}\left( C_2 \, C_{4,k_1 k_2} \right) \; , \end{align*} where we defined \begin{align*} C_{3,k_1 k_2,it} &= e_{it} \widehat {\cal X}_{k_1,it} \widehat {\cal X}_{k_2,it} \; , \nonumber \\ C_{4,k_1 k_2,it} &=\widehat e_{it} \widehat {\cal X}_{k_1,it} \widehat {\cal X}_{k_2,it} \; , \nonumber \\ C_{5,k_1 k_2 k_3} &= \frac 1 {NT} \sum_{i=1}^N \sum_{t=1}^T \, e_{it} \widehat {\cal X}_{k_1,it} \widehat {\cal X}_{k_2,it} X_{k_3,it} \; , \nonumber \\ C_{6,k_1 k_2 k_3} &=\frac 1 {NT} \sum_{i=1}^N \sum_{t=1}^T \, \widehat e_{it} \widehat {\cal X}_{k_1,it} \widehat {\cal X}_{k_2,it} X_{k_3,it} \; . \end{align*} Again, because we have uniformly bounded $8$th moments for $e_{it}$ and $X_{k,it}$, we find \begin{align*} \| C_{3,k_1 k_2} \|^4 &\leq \| C_{3,k_1 k_2} \|_F^4 \nonumber \\ &= \left( \sum_{i=1}^N \sum_{t=1}^T e_{it}^2 \widehat {\cal X}_{k_1,it}^2 \widehat {\cal X}_{k_2,it}^2 \right)^2 \nonumber \\ &\leq \left( \sum_{i=1}^N \sum_{t=1}^T e_{it}^4 \right) \left( \sum_{i=1}^N \sum_{t=1}^T \widehat {\cal X}_{k_1,it}^4 \widehat {\cal X}_{k_2,it}^4 \right) \nonumber \\ &= {\cal O}_p(N^2 T^2) \; , \end{align*} i.e., $\| C_{3,k_1 k_2} \|={\cal O}_p(\sqrt{NT})$. Furthermore \begin{align*} \| C_{4,k_1 k_2} \|^2 &\leq \| C_{3,k_1 k_2} \|_F^2 \nonumber \\ &= \sum_{i=1}^N \sum_{t=1}^T \widehat e_{it}^2 \widehat {\cal X}_{k_1,it}^2 \widehat {\cal X}_{k_2,it}^2 \nonumber \\ &\leq \left( \sum_{i=1}^N \sum_{t=1}^T \widehat e_{it}^2 \right) \max_{i=1\ldots N} \max_{t=1\ldots T} \left( \widehat {\cal X}_{k_1,it}^2 \widehat {\cal X}_{k_2,it}^2 \right) \nonumber \\ &\leq \left( \sum_{i=1}^N \sum_{t=1}^T e_{it}^2 \right) \max_{i=1\ldots N} \max_{t=1\ldots T} \left( \widehat {\cal X}_{k_1,it}^2 \widehat {\cal X}_{k_2,it}^2 \right) \nonumber \\ &= {\cal O}_p(NT) {\cal O}_p((NT)^{(4/(8+\epsilon))}) = o_p((NT)^{(3/4)}) \; . \end{align*} Here we used the assumption that $X_k$ has uniformly bounded moments of order $8+\epsilon$ for some $\epsilon>0$. We also used $\sum_{i=1}^N \sum_{t=1}^T \widehat e_{it}^2 \leq \sum_{i=1}^N \sum_{t=1}^T e_{it}^2$. For $C_5$ we find \begin{align*} C_{5,k_1 k_2 k_3}^2 &\leq \left( \frac 1 {NT} \sum_{i=1}^N \sum_{t=1}^T \, e_{it}^2 \right) \left( \frac 1 {NT} \widehat {\cal X}_{k_1,it}^2 \widehat {\cal X}_{k_2,it}^2 X_{k_3,it}^2 \right) \nonumber \\ &= {\cal O}_p(1) \; , \end{align*} i.e., $C_{5,k_1 k_2 k_3} = {\cal O}_p(1)$, and analogously $C_{6,k_1 k_2 k_3} = {\cal O}_p(1)$, because $\sum_{i=1}^N \sum_{t=1}^T \widehat e_{it}^2 \leq \sum_{i=1}^N \sum_{t=1}^T e_{it}^2$. Using these results we obtain \begin{align*} | A_{2,k_1 k_2} | &\leq - \,\sum_{k_3=1}^K \left\| \widehat \beta_{k_3} - \beta^0_{k_3} \right\| \left| C_{5,k_1 k_2 k_3} + C_{6,k_1 k_2 k_3} \right| + \frac {11 R} {NT} \|C_2\| \|C_{3,k_1 k_2} \| + \frac {11 R} {NT} \| C_2\| \| C_{4,k_1 k_2} \| \nonumber \\ &= {\cal O}_p((NT)^{-1/2}) {\cal O}_p(1) + \frac {11 R} {NT} {\cal O}_p(N^{1/2}) {\cal O}_p(\sqrt{NT}) + \frac {11 R} {NT} {\cal O}_p(N^{1/2}) o_p((NT)^{3/4}) = o_p(1) \; . \end{align*} This is what we wanted to show. \end{proof} Remember, the truncation Kernel $\Gamma(.)$ is defined by $\Gamma(x)=1$ for $|x|\leq 1$ and $\Gamma(x)=0$ otherwise. Without loss of generality we assume in the following the bandwidth parameter $M$ is a positive integer (without this assumption, one needs to replace $M$ everywhere below by the largest integer contained in $M$, but nothing else changes). \begin{proof}[\bf Proof of Lemma \ref{lemma:lambdafINV}] By Lemma~\ref{lemma:Pfhat} we know asymptotically $P_{\widehat f}$ is close to $P_{f^0}$ and therefore ${\rm rank}(P_{\widehat f}P_{f^0})={\rm rank}(P_{f^0}P_{f^0})=R$ , i.e., ${\rm rank}(P_{\widehat f}f^0)=R$ asymptotically. We can therefore write $\widehat f = P_{\widehat f} f^0 H$, where $H=H_{NT}$ is a non-singular $R\times R$ matrix. We now want to show $\|H\|={\cal O}_p(1)$ and $\|H^{-1}\|={\cal O}_p(1)$. Because of our normalization of $\widehat f$ and $f^0$ we have $H=(\widehat f' P_{\widehat f} f^0/T)^{-1}=(\widehat f' f^0/T)^{-1}$, and therefore $\|H^{-1}\|\leq \|\widehat f\| \|f^0\| /T ={\cal O}_p(1)$. We also have $\widehat f = f^0 H + (P_{\widehat f}-P_{f^0}) f^0 H$, and thus $H=f^{0\prime} \widehat f/T - f^{0\prime} (P_{\widehat f}-P_{f^0}) f^0 H /T$, i.e., $\|H\| \leq {\cal O}_p(1) + \|H\| {\cal O}_p\left(T^{-1/2}\right)$ which shows $\|H\|={\cal O}_p(1)$. Note all the following results only require $\|H\|={\cal O}_p(1)$ and $\|H^{-1}\|={\cal O}_p(1)$, but apart from that are independent of the choice of normalization. The advantage of expressing $\widehat f$ in terms of $P_{\widehat f}$ as above is that the result $\left\| P_{\widehat f} - P_{f^0} \right\| = {\cal O}_p\left(T^{-1/2}\right)$ of Lemma~\ref{lemma:Pfhat} immediately implies \begin{align*} \left\| \widehat f - f^0 \, H \right\| &= {\cal O}_p\left(1\right) \; . \end{align*} The FOC wrt $\lambda$ in the minimization of the first line in equation \eqref{LNT123} reads \begin{align} \widehat \lambda \, \widehat f' \widehat f &= \left(Y-\sum_{k=1}^{K} \widehat \beta_{k} X_{k} \right) \widehat f \; , \label{hatf_close} \end{align} which yields \begin{align*} \widehat \lambda &= \left[ \lambda^0 f^{0\prime} - \sum_{k=1}^{K} \left( \widehat \beta_{k} - \beta^0_k \right) X_{k} \right] \widehat f \left(\widehat f' \widehat f\right)^{-1} \nonumber \\ &= \left[ \lambda^0 f^{0\prime} + \sum_{k=1}^{K} \left( \beta^0_k - \widehat \beta_{k} \right) X_{k} + e\right] P_{\widehat f} f^0 \left(f^{0\prime} P_{\widehat f} f^0\right)^{-1} \, \left(H'\right)^{-1} \nonumber \\ &= \lambda^0 \, \left(H'\right)^{-1} + \lambda^0 f^{0\prime} \left( P_{\widehat f}-P_{f^0} \right) f^0 \left(f^{0\prime} P_{\widehat f} f^0\right)^{-1} \, \left(H'\right)^{-1} \nonumber \\ & \qquad \qquad \qquad + \lambda^0 f^{0\prime} f^0 \left[ \left(f^{0\prime} P_{\widehat f} f^0\right)^{-1} - \left(f^{0\prime} f^0\right)^{-1} \right] \, \left(H'\right)^{-1} \nonumber \\ & \qquad \qquad \qquad + \left[ \sum_{k=1}^{K} \left( \beta^0_k - \widehat \beta_{k} \right) X_{k} + e\right] P_{\widehat f} f^0 \left(f^{0\prime} P_{\widehat f} f^0\right)^{-1} \, \left(H'\right)^{-1} \; . \end{align*} We have $\left(f^{0\prime} P_{\widehat f} f^0\right/T)^{-1} - \left(f^{0\prime} f^0/T\right)^{-1}={\cal O}_p(T^{-1/2})$, because $\left\| P_{\widehat f} - P_{f^0} \right\| = {\cal O}_p\left(T^{-1/2}\right)$ and $f^{0\prime} f^0/T$ by assumption is converging to a positive definite matrix (or given our particular choice of normalization is just the identity matrix $\mathbb{I}_R$). In addition, we have $\|e\|={\cal O}_p(\sqrt{T})$, $\|X_k\|={\cal O}_p(\sqrt{NT})$ and by corollary \ref{lemma:sqrtNTcons} also $\|\widehat \beta - \beta^0\|={\cal O}_p(1/\sqrt{NT})$. Therefore \begin{align} \left\| \widehat \lambda - \lambda^0 \, \left(H'\right)^{-1} \right\| &= {\cal O}_p\left(1\right) \; , \label{hatlambda_close} \end{align} which is what we wanted to prove. Next, we want to show \begin{align} \label{lambdafsquare} \left\| \left( \frac{\widehat \lambda^{\prime} \, \widehat \lambda} N \right)^{-1} - \left( \frac{ \left(H\right)^{-1} \,\lambda^{0\prime} \, \lambda^0 \, \left(H'\right)^{-1}} N \right)^{-1} \right\| = {\cal O}_p\left(N^{-1/2}\right) \; , \nonumber \\ \left\| \left( \frac{\widehat f^{\prime} \, \widehat f} T \right)^{-1} - \left( \frac{H' \,f^{0\prime} \, f^0 \, H} T \right)^{-1} \right\| = {\cal O}_p\left(T^{-1/2}\right) \; . \end{align} Let $A=N^{-1} \, \widehat \lambda^{\prime} \, \widehat \lambda$ and $B=N^{-1} \, \left(H\right)^{-1} \, \lambda^{0\prime} \, \lambda^0 \, \left(H'\right)^{-1}$. Using \eqref{hatlambda_close} we find \begin{align*} \| A-B \| &= \frac 1 {2N} \left\| \left[ \widehat \lambda^{\prime} + \left(H\right)^{-1} \, \lambda^{0\prime} \right] \left[ \widehat \lambda - \lambda^0 \, \left(H'\right)^{-1} \right] +\left[ \widehat \lambda^{\prime} - \left(H\right)^{-1} \, \lambda^{0\prime} \right] \left[ \widehat \lambda + \lambda^0 \, \left(H'\right)^{-1} \right] \right\| \nonumber \\ &= N^{-1} \, {\cal O}_p(N^{1/2}) \, {\cal O}_p(1) = {\cal O}_p \left( N^{-1/2} \right) \; . \end{align*} By assumption \ref{ass:A1} we know \begin{align*} \left\| \left( \frac{\lambda^{0\prime} \, \lambda^0} N \right)^{-1} \right\| &= {\cal O}_p(1) \; , \end{align*} and thus also $\left\|B^{-1}\right\|= {\cal O}_p(1)$, and therefore $\left\|A^{-1}\right\|= {\cal O}_p(1)$ (using $\| A-B \|=o_p(1)$ and applying Weyl's inequality to the smallest eigenvalue of $B$). Because $A^{-1} - B^{-1} = A^{-1} (B-A) B^{-1}$ we find \begin{align*} \left\| A^{-1} - B^{-1} \right\| &\leq \left\|A^{-1}\right\| \, \left\|B^{-1}\right\| \, \left\|A-B\right\| \nonumber \\ &= {\cal O}_p\left( N^{-1/2} \right) \; . \end{align*} Thus, we have shown the first statement of \eqref{lambdafsquare}, and analogously one can show the second one. Combining \eqref{hatlambda_close}, \eqref{hatf_close} and \eqref{lambdafsquare} we obtain \begin{align*} & \left\| \frac{\widehat \lambda}{\sqrt{N}} \, \left( \frac{\widehat \lambda^{\prime}\widehat\lambda}{N} \right)^{-1} \, \left(\frac{\widehat f^{\prime}\widehat f}T\right)^{-1} \, \frac{\widehat f^{\prime}}{\sqrt{T}} - \frac{\lambda^0}{\sqrt{N}} \, \left(\frac{\lambda^{0\prime}\lambda^0}N\right)^{-1} \, \left(\frac{f^{0\prime}f^0}T\right)^{-1} \, \frac{f^{0\prime}} {\sqrt{T}} \right\| \nonumber \\ & = \left\| \frac{\widehat \lambda}{\sqrt{N}} \, \left( \frac{\widehat \lambda^{\prime}\widehat\lambda}{N} \right)^{-1} \, \left(\frac{\widehat f^{\prime}\widehat f}T\right)^{-1} \, \frac{\widehat f^{\prime}}{\sqrt{T}} - \frac{\lambda^0 \left(H'\right)^{-1}}{\sqrt{N}} \, \left(\frac{\left(H\right)^{-1}\lambda^{0\prime}\lambda^0 \left(H'\right)^{-1}}N\right)^{-1} \, \left(\frac{H' f^{0\prime}f^0 H}T\right)^{-1} \, \frac{H' f^{0\prime}} {\sqrt{T}} \right\| \nonumber \\ & \qquad \qquad = {\cal O}_p\left( N^{-1/2} \right) \; , \end{align*} which is equivalent to the statement in the lemma. Note also $\widehat \lambda \, (\widehat \lambda^{\prime}\widehat\lambda)^{-1} \, (\widehat f^{\prime}\widehat f)^{-1} \, \widehat f^{\prime}$ is independent of $H$, i.e., independent of the choice of normalization. \end{proof} \begin{proof}[\bf Proof of Lemma~\ref{lemma:exp}] \# Part A of the proof: We start by showing \begin{align} N^{-1} \, \left\| \mathbb{E}_{\cal C}\left[ e' X_k - \left( e' X_k \right)^{\rm truncR} \right] \right\| &= o_p(1) \; . \label{exp_proof_part1} \end{align} Let $A=e' X_k$ and $B=A-A^{\rm truncR}$. By definition of the left-sided truncation (using the truncation kernel $\Gamma(.)$ defined above) we have $B_{t\tau}=0$ for $t<\tau\leq t+M$ and $B_{t\tau}=A_{t\tau}$ otherwise. By assumption~\ref{ass:A5} we have $\mathbb{E}_{\cal C}(A_{t\tau})=0$ for $t \geq \tau$. For $t<\tau$ we have $\mathbb{E}_{\cal C}(A_{t\tau}) = \sum_{i=1}^N \mathbb{E}_{\cal C}(e_{it} X_{k,i\tau})$. We thus have $\mathbb{E}_{\cal C}(B_{t\tau})=0$ for $\tau\leq t+M$, and $\mathbb{E}_{\cal C} B_{t\tau} = \sum_{i=1}^N \mathbb{E}_{\cal C}(e_{it} X_{k,i\tau}) $ for $\tau > t+M$. Therefore \begin{align*} \left\| \mathbb{E}_{\cal C}(B) \right\|_1 &= \max_{t=1\ldots T} \, \sum_{\tau=1}^T |\mathbb{E}_{\cal C}(B_{t\tau})| \nonumber \\ &\leq \max_{t=1\ldots T} \, \sum_{\tau=t+M+1}^T \, \left| \sum_{i=1}^N \mathbb{E}_{\cal C}(e_{it} X_{k,i\tau}) \right| \leq N \max_{t=1\ldots T} \, \sum_{\tau=t+M+1}^T c \, (\tau - t)^{- (1+ \epsilon)} = o_p(N) \; , \end{align*} where we used $M\rightarrow \infty$. Analogously we can show $\left\| \mathbb{E}_{\cal C}(B) \right\|_\infty = o_p(N)$. Using part (vii) of Lemma~\ref{lemma:inequalities} we therefore also find $\left\| \mathbb{E}_{\cal C}(B) \right\| = o_p(N)$, which is equivalent to equation \eqref{exp_proof_part1} we wanted to show in this part of the proof. Analogously we can show \begin{align*} N^{-1} \, \left\| \mathbb{E}_{\cal C}\left[ e' e - \left( e' e \right)^{\rm truncD} \right] \right\| &= o_p(1) \; , \nonumber \\ T^{-1} \, \left\| \mathbb{E}_{\cal C}\left[ e e' - \left( e e' \right)^{\rm truncD} \right] \right\| &= o_p(1) \; . \end{align*} \# Part B of the proof: Next, we want to show \begin{align} N^{-1} \, \left\| \left[ e' X_k \, - \, \mathbb{E}_{\cal C}\left(e' X_k\right) \right]^{\rm truncR} \right\| &= o_p(1) \; . \label{exp_proof_part2} \end{align} Using Lemma~\ref{Lemma:Trunc} we have \begin{align*} N^{-1} \left\| \left[ e' X_k \, - \, \mathbb{E}_{\cal C}\left(e' X_k\right) \right]^{\rm truncR} \right\| &\leq M \, \max_t \, \max_{t<\tau\leq t+M} \, N^{-1} \, \left| e_t' X_{k,\tau} - \mathbb{E}_{\cal C} \left( e_t' X_{k,\tau} \right) \right| \nonumber \\ &\leq M \, \max_t \, \max_{t<\tau\leq t+M} \, N^{-1} \, \left| \sum_{i=1}^N \left[ e_{it} X_{k,i\tau} - \mathbb{E}_{\cal C} \left( e_{it} X_{k,i\tau} \right) \right] \right| \nonumber \\ &\leq M \, N^{-1/2} \, \max_t \, \max_{t<\tau\leq t+M} \, \left| \bar Z^{(1)}_{k,t\tau} \right| . \end{align*} According to Lemma~\ref{lemma:barZ} we know $\mathbb{E}_{\cal C} \left| \bar Z^{(1)}_{k,t\tau} \right|^4$ is bounded uniformly across $t$ and $\tau$. Applying Lemma~\ref{lemma:maxRV} we therefore find $\max_t \, \max_{t<\tau\leq t+M} \bar Z^{(1)}_{t\tau} = {\cal O}_p((MT)^{1/4})$. Thus we have \begin{align*} M \, N^{-1/2} \, \max_t \, \max_{t<\tau\leq t+M} \, \left| \bar Z^{(1)}_{t\tau} \right| &= {\cal O}_p\left(M \, N^{-1/2} \, (MT)^{1/4}\right) \, = \, o_p(1) \; . \end{align*} Here we used $M^5/T \rightarrow 0$. Analogously we can show \begin{align*} N^{-1} \, \left\| \left[ e' e \, - \, \mathbb{E}_{\cal C} \left(e' e\right) \right]^{\rm truncD} \right\| &= o_p(1) \; , \nonumber \\ T^{-1} \, \left\| \left[ e e' \, - \, \mathbb{E}_{\cal C} \left(e e'\right) \right]^{\rm truncD} \right\| &= o_p(1) \; . \end{align*} \# Part C of the proof: Finally, we want to show \begin{align} N^{-1} \, \left\| \left[ e' X_k \, - \, \widehat e' \, X_k \right]^{\rm truncR} \right\| &= o_p(1) \; . \label{exp_proof_part3} \end{align} According to theorem \ref{theorem:expansions} we have $\widehat e =M_{\lambda^0} e M_{f^0} + e_{\rm rem}$, where $e_{\rm rem} \equiv \widehat e^{(1)}_e - \sum_{k=1}^K \left( \widehat \beta_k - \beta^0_k \right) \widehat e^{(1)}_k + \widehat e^{({\rm rem})}$. We then have \begin{align*} & N^{-1} \, \left\| \left[ e' X_k \, - \, \widehat e' \, X_k \right]^{\rm truncR} \right\| \\ &\leq N^{-1} \, \left\| \left[ e_{\rm rem}' X_k \right]^{\rm truncR} \right\| + N^{-1} \, \left\| \left[ P_{f^0} e' M_{\lambda^0} X_k \right]^{\rm truncR} \right\| + N^{-1} \, \left\| \left[ e' P_{\lambda^0} X_k \right]^{\rm truncR} \right\| . \end{align*} Using corollary \ref{lemma:sqrtNTcons} we find the remainder term satisfies $\| e_{\rm rem} \| = {\cal O}_p(1)$. Using Lemma~\ref{Lemma:Trunc} we find \begin{align*} N^{-1} \, \left\| \left[ e'_{\rm rem} \, X_k \right]^{\rm truncR} \right\| &= \frac M N \, \max_{t,\tau} \, \widehat e'_{{\rm rem},t} \, X_{k,\tau} \nonumber \\ &\leq \frac M N \, \max_{t,\tau} \, \| e_{{\rm rem},t} \| \, \| X_{k,\tau} \| \nonumber \\ &\leq \frac M N \, \| e_{\rm rem} \| \, \max_{\tau} \| X_{k,\tau} \| \nonumber \\ &\leq \frac M N {\cal O}_p(1) {\cal O}_p(N^{1/2} T^{1/8}) = o_p(1) \; , \end{align*} where we used the fact that the norm of each column $ e_{{\rm rem},t}$ is smaller than the operator norm of the whole matrix $ e_{\rm rem}$. In addition we used Lemma~\ref{lemma:maxRV} and the fact that $N^{-1/2} \, \| X_{k,\tau} \| = \sqrt{ N^{-1} \sum_{i=1}^N X_{k,i\tau}^2 }$ has finite 8'th moment to show $\max_{\tau} \| X_{k,\tau} \| = {\cal O}_p(N^{1/2} T^{1/8})$. Using again Lemma~\ref{Lemma:Trunc} we find \begin{align*} N^{-1} \, \left\| \left[ P_{f^0} e' M_{\lambda^0} X_k \right]^{\rm truncR} \right\| &\leq N^{-1} \, M \, \max_{t,\tau=1\ldots T} \, \left| f^0_t \, (f^{0\prime} \, f^0)^{-1} \, f^{0\prime} \, e' M_{\lambda^0} X_{k,\tau} \right| \nonumber \\ &\leq N^{-1} \, M \, \|e\| \, \|f^0\| \, \left\|(f^{0\prime} \, f^0)^{-1} \right\| \, \max_t \, \| f^0_t \| \, \max_{\tau} \| X_{k,\tau} \| \nonumber \\ &= N^{-1} \, M \, {\cal O}_p(N^{1/2}) \, {\cal O}_p(T^{1/2}) \, {\cal O}_p(T^{-1}) \, {\cal O}_p(N^{1/2} T^{1/8}) = o_p(1) \; , \end{align*} and \begin{align*} \, \left\| \left[ e' P_{\lambda^0} X_k \right]^{\rm truncR} \right\| &\leq N^{-1/2} M \max_{t =1\ldots T} \left( N^{-1/2} \sum_i e_{it} \lambda^0_i \right) (N^{-1} \lambda^{0\prime} \, \lambda^0)^{-1} \max_{\tau =1\ldots T} \left( N^{-1} \sum_j \lambda^{0 \prime}_j X_{jt} \right) \\ &= N^{-1/2} M {\cal O}_p(T^{1/8}) {\cal O}_p(1) {\cal O}_p(T^{1/8}) = o_p(1). \end{align*} Thus, we proved equation \eqref{exp_proof_part3}. Analogously we obtain \begin{align*} N^{-1} \, \left\| \left[ e' e \, - \, \widehat e' \, \widehat e \right]^{\rm truncD} \right\| &= o_p(1) \; , \nonumber \\ T^{-1} \, \left\| \left[ e e' \, - \, \widehat e \, \widehat e' \right]^{\rm truncD} \right\| &= o_p(1) \; . \end{align*} \# Combining \eqref{exp_proof_part1}, \eqref{exp_proof_part2}, and \eqref{exp_proof_part3}, we obtain $N^{-1} \, \left\| \mathbb{E}_{\cal C}(e' X_k ) - \left( \widehat e' \, X_k \right)^{\rm truncR} \right\| = o_p(1)$. The proof of the other two statements of the lemma is analogous. \end{proof} \begin{proof}[\bf Proof of Lemma~\ref{lemma:normsTrunc}] Using theorem \ref{theorem:expansions} and \ref{lemma:sqrtNTcons} we find $\| \widehat e \| = {\cal O}_p(N^{1/2})$. Applying Lemma~\ref{Lemma:Trunc} we therefore find \begin{align*} N^{-1} \, \left\| \left( \widehat e' \, X_k \right)^{\rm truncR} \right\| &\leq \frac M N \, \max_{t,\tau} \, \left| \widehat e'_t \, X_{k,\tau} \right| \nonumber \\ &\leq \frac M N \, \max_{t,\tau} \, \| \widehat e_{t} \| \, \| X_{k,\tau} \| \nonumber \\ &\leq \frac M N \, \| \widehat e \| \, \max_{\tau} \| X_{k,\tau} \| \nonumber \\ &\leq \frac M N {\cal O}_p(N^{1/2}) {\cal O}_p(N^{1/2} T^{1/8}) = {\cal O}_p(M T^{1/8}) \; , \end{align*} where we used the result $\max_{\tau} \| X_{k,\tau} \| = {\cal O}_p(N^{1/2} T^{1/8})$ that was already obtained in the proof of the last theorem. The proof for the statement (ii) and (iii) is analogous. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Proofs for Section \ref{sec:testing} (Testing)} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \begin{proof}[\bf Proof of Theorem \ref{th:gradient}] Using the expansion for $L_{NT}(\beta)$ in Lemma S.1 in the supplementary material of Moon and Weidner~\cite*{MoonWeidner2015} we find for the derivative (the sign convention $\epsilon_k=\beta^0_k - \beta_k$ results in the minus sign below) \begin{align*} \frac{\partial L_{NT}}{\partial \beta_k} &= \, - \, \frac{1} {NT} \, \sum_{g=2}^\infty \, g \, \sum_{\kappa_1=0}^K \, \sum_{\kappa_2=0}^K \, \ldots \sum_{\kappa_{g-1}=0}^K \, \epsilon_{\kappa_1} \, \epsilon_{\kappa_2} \, \ldots \, \epsilon_{\kappa_{g-1}} \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, X_{\kappa_1}, \ldots ,X_{\kappa_{g-1}}\right) \nonumber \\ &= \left[ 2 W_{NT} (\beta-\beta^0) \right]_k - \, \frac 2 {\sqrt{NT}} C_{NT,k} + \frac 1 {NT} \nabla R_{1,NT,k} + \frac 1 {NT} \nabla R_{2,NT,k} \; , \end{align*} where \begin{align*} W_{NT,k_1 k_2} &= \frac{1} {NT} \, L^{(2)}\left(\lambda^0,\, f^0,\, X_{k_1},\, X_{k_2} \right) \; , \nonumber \\ C_{NT,k} &= \frac 1 {2\sqrt{NT}} \, \sum_{g=2}^{G_e} \, g \, (\epsilon_{0})^{g-1} \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, X_{0}, \ldots ,X_{0}\right) \nonumber \\ &= \sum_{g=2}^{G_e} \, \frac g {2\sqrt{NT}} \, \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, e, \ldots ,e\right) \; , \end{align*} and \begin{align*} \nabla R_{1,NT,k} &= \, - \, \sum_{g=G_e+1}^\infty \, g \, (\epsilon_{0})^{g-1} \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, X_{0}, \ldots ,X_{0}\right) \; , \nonumber \\ &= \, - \, \sum_{g=G_e+1}^\infty \, g \, \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, e, \ldots ,e\right) \; , \nonumber \\ \nabla R_{2,NT,k} &= \, - \, \sum_{g=3}^\infty \, g \, \sum_{r=1}^{g-1} \, {g-1 \choose r} \sum_{k_1=1}^K \, \ldots \sum_{{k_r}=1}^K \, \epsilon_{k_1} \, \ldots \,\epsilon_{k_r} \, (\epsilon_0)^{g-r-1} \, \nonumber \\ & \qquad \qquad \qquad \qquad \qquad \qquad \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, X_{k_1}, \ldots ,X_{k_r},X_0,\ldots,X_0\right) \; . \nonumber \\ &= \, - \, \sum_{g=3}^\infty \, g \, \sum_{r=1}^{g-1} \, {g-1 \choose r} \sum_{k_1=1}^K \, \ldots \sum_{{k_r}=1}^K \, \,(\beta^0_{k_1}-\beta_{k_1}) \, \ldots \, (\beta^0_{k_r}-\beta_{k_r}) \nonumber \\ & \qquad \qquad \qquad \qquad \qquad \qquad \, L^{(g)}\left(\lambda^0,\, f^0,\, X_{k},\, X_{k_1}, \ldots ,X_{k_r},e,\ldots,e\right) \; . \end{align*} The above expressions for $W_{NT}$ and $C_{NT}$ are equivalent to their definitions given in theorem \ref{th:ass_expand}. Using the bound on $L^{(g)}$ and ${{n \choose k}} \leq 4^n$ we find \begin{align*} |\nabla R_{1,NT,k}| &\leq c_0 \, NT \, \frac{\|X_k\|}{\sqrt{NT}} \, \sum_{g=G_e+1}^\infty \, g^2 \, \left(\frac{c_1 \|e\|}{\sqrt{NT}} \right)^{g-1} \nonumber \\ &\leq 2 \, c_0 \, (1+G_e)^2 \, NT \, \frac{\|X_k\|}{\sqrt{NT}} \, \left(\frac{c_1 \|e\|}{\sqrt{NT}} \right)^{G_e} \, \left[1-\left(\frac{c_1 \|e\|}{\sqrt{NT}} \right)\right]^{-3} = o_p(\sqrt{NT}) \; , \nonumber \\ |\nabla R_{2,NT,k}| &\leq c_0 \, NT \, \frac{\|X_{k}\|}{\sqrt{NT}} \, \sum_{g=3}^\infty \, g^2 \, \sum_{r=1}^{g-1} \, {g-1 \choose r} \, c_1^{g-1} \, \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_k| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} \right) \nonumber \\ & \qquad\qquad\qquad\qquad\qquad\qquad\qquad\qquad \times \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_k| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} + \frac{\|e\|}{\sqrt{NT}} \right)^{g-2} \nonumber \\ &\leq c_0 \, NT \, \frac{\|X_{k}\|}{\sqrt{NT}} \, \sum_{g=3}^\infty \, g^3 \, (4c_1)^{g-1} \, \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_k| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} \right) \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_{\widetilde k}| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} + \frac{\|e\|}{\sqrt{NT}} \right)^{g-2} \nonumber \\ &\leq c_2 \, NT \, \frac{\|X_{k}\|}{\sqrt{NT}} \, \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_k| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} \right) \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_{\widetilde k}| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} + \frac{\|e\|}{\sqrt{NT}} \right) \; , \end{align*} where $c_0=8 R d_{\max}(\lambda^0,f^0)/2$ and $c_1=16 d_{\max}(\lambda^0,f^0)/d_{\min}^2(\lambda^0,f^0)$ both converge to a constants as $N,T \rightarrow \infty$, and the very last inequality is only true if $4 c_1 \left( \sum_{\widetilde k=1}^K |\beta_{\widetilde k}-\beta^0_{\widetilde k}| \frac{\|X_{\widetilde k}\|}{\sqrt{NT}} + \frac{\|e\|}{\sqrt{NT}} \right)<1$, and $c_2>0$ is an appropriate positive constant. To show $\nabla R_{1,NT,k}=o_p(NT)$ we used Assumption~\ref{ass:A3}$^*$. From the above inequalities we find for $\eta_{NT} \rightarrow \infty$ \begin{align*} \sup_{\{\beta :\left\| \beta -\beta^{0} \right\| \leq \eta_{NT}\}} \frac{ \left\| \nabla R_{1,NT}(\beta) \right\| } { \sqrt{NT} } = o_{p}\left( 1 \right) , \nonumber \\ \sup_{\{\beta :\left\| \beta -\beta^{0} \right\| \leq \eta_{NT}\}} \frac{ \left\| \nabla R_{2,NT}(\beta) \right\| } { NT \, \left\| \beta -\beta^{0} \right\| } = o_{p}\left( 1 \right) . \end{align*} Thus $R_{NT}(\beta)=R_{1,NT}(\beta)+R_{2,NT}(\beta)$ satisfies the bound in the theorem. \end{proof} \begin{proof}[\bf Proof of Theorem \ref{th:testing}] Using Theorem~\ref{th:limdis} it is straightforward to show $WD_{NT}^*$ has limiting distribution $\chi^2_r$. For the LR test we have to show the estimator $\widehat c = (NT)^{-1} {\rm Tr} (\widehat e(\widehat \beta) \, \widehat e'(\widehat \beta))$ is consistent for $c=\mathbb{E}_{\cal C} e_{it}^2$. As already noted in the main text we have $\widehat c = L_{NT}\left( \widehat \beta \right)$, and using our expansion and $\sqrt{NT}$-consistency of $\widehat \beta$ we immediately obtain \begin{align*} \widehat c &= \frac 1 {NT} \, {\rm Tr} (M_{\lambda^0} e M_{f^0} e') + o_p(1) \; . \end{align*} Alternatively, one could use the expansion of $\widehat e$ in Theorem~\ref{theorem:expansions} to show this. From the above result we find \begin{align*} \left| \widehat c - \frac 1 {NT} {\rm Tr} (ee') \right| &= \frac 1 {NT} \left| {\rm Tr} (P_{\lambda^0} e M_{f^0} e') + {\rm Tr} (e P_{f^0} e') \right| + o_p(1) \nonumber \\ &\leq \frac {2R} {NT} \, \|e\|^2 + o_p(1) = o_p(1) \; . \end{align*} By the weak law of large numbers we thus have \begin{align*} \widehat c = \frac 1 {NT} \sum_{i=1}^N \sum_{t=1}^T e_{it}^2 + o_p(1) = c + o_p(1) \; , \end{align*} i.e., $\widehat c$ is indeed consistent for $c$. Having this one immediately obtains the result for the limiting distribution of $LR_{NT}^*$. For the LM test we first want to show equation \eqref{EquivGrads} holds. Using the expansion of $\widehat e$ in Theorem~\ref{theorem:expansions} one obtains \begin{align*} \sqrt{NT} (\widetilde \nabla {\cal L}_{NT})_k \, &= \, - \, \frac 2 {\sqrt{NT}} \, {\rm Tr}\left(X'_k \widetilde e\right) \nonumber \\ &= \left[ 2 \, \sqrt{NT} \, W_{NT} \, \left( \widetilde \beta - \beta^0\right) \right]_k + \frac 2 {NT} C^{(1)}(\lambda^0,f^0,X_k,e) + \frac 2 {NT} C^{(2)}(\lambda^0,f^0,X_k,e) \nonumber \\ & \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \qquad \, - \, \frac 2 {\sqrt{NT}} \, {\rm Tr}\left(X'_k \widetilde e^{(\rm rem)}\right) \nonumber \\ &= \left[ 2 \,\sqrt{NT} \, W_{NT} \, \left( \widetilde \beta - \beta^0\right) + \frac 2 {NT} C_{NT} \right]_k + o_p(1) \nonumber \\ &= \sqrt{NT} \left[ \nabla L_{NT}(\widetilde \beta) \right]_k + o_p(1) \, , \end{align*} which is what we wanted to show. Here we used $|{\rm Tr}\left(X'_k \widetilde e^{(\rm rem)}\right)| \leq 7 R \|X_k\| \|\widetilde e^{(\rm rem)}\| = {\cal O}_p(N^{3/2})$. Note that $\|X_k\|={\cal O}_p(N)$, and Theorem~\ref{theorem:expansions}, and $\sqrt{NT}$-consistency of $\widetilde \beta$, together imply $\|\widetilde e^{(\rm rem)}\| = {\cal O}_p(\sqrt{N})$. We also used the expression for $\nabla L_{NT}(\widetilde \beta)$ given in Theorem~\ref{th:gradient}, and the bound on $\nabla R_{NT}(\beta)$ given there. We now use equation \eqref{limNablaL} and $\widetilde W=W+o_p(1)$, $\widetilde \Omega=\Omega+o_p(1)$, and $\widetilde B=B+o_p(1)$ to obtain \begin{align*} LM^*_{NT} \; \; \limfunc{\longrightarrow}_d \; \; (C - B)' W^{-1} H' (H W^{-1} \Omega W^{-1} H')^{-1} H W^{-1} (C - B) \; . \end{align*} Under $H_0$ we thus find $LM^*_{NT} \; \limfunc{\rightarrow}_d \; \chi^2_r$. \end{proof} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \section{Additional Monte Carlo Results} %\label{sec:AddMC} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% We consider an ${\rm AR}(1)$ model with $R$ factors \begin{align*} Y_{it} \, &= \, \rho^0 \, Y_{i,t-1} \, + \, \sum_{r=1}^R \lambda^0_{ir} \, f^0_{tr} \, + \, e_{it} \; . \end{align*} We draw the $e_{it}$ independently and identically distributed from a t-distribution with five degrees of freedom. The $\lambda^0_{ir}$ are independently distributed as ${\cal N}(1,1)$, and we generate the factors from an ${\rm AR}(1)$ specification, namely $f^0_{tr}=\rho_f \, f^0_{t-1,r} + u_{tr}$, for each $r=1,\ldots,R$, where $u_{tr} \sim {\rm iid} {\cal N}(0,(1-\rho_f^2)\sigma_f^2)$. For all simulations we generate 1,000 initial time periods for $f^0_t$ and $Y_{it}$ that are not used for estimation. This guarantees the simulated data used for estimation are distributed according to the stationary distribution of the model. For $R=1$ this is exactly the simulation design used in the main text Monte Carlo section, but DGPs with $R>1$ were not considered in the main text. Table~\ref{tab:extra1} reports results for which $R=1$ is used both in the DGP and for the LS estimation. Table~\ref{tab:extra2} reports results for which $R=1$ is used in the DGP, but $R=2$ is used for the LS estimation. Table~\ref{tab:extra3} reports results for which $R=2$ is used both in the DGP and for the LS estimation. The results in Table~\ref{tab:extra1} and~\ref{tab:extra2} are identical to those reported in the main text Table~\ref{tab:T1} and~\ref{tab:T2}, except we also report results for the CCE estimator. The results in Table~\ref{tab:extra3} are not contained in the main text. The CCE estimator is obtained by using $\widehat f^{\rm proxy}_t = N^{-1} \sum_i ( Y_{it}, \, Y_{i,t-1} )'$ as a proxy for the factors and then estimating the parameters $\rho$, $\lambda_{i1}$, $\lambda_{i2}$, $i=1,\ldots,N$, via OLS in the linear regression model $ Y_{it} = \rho Y_{i,t-1} + \lambda_{i1} \widehat f^{\rm proxy}_{t1} + \lambda_{i2} \widehat f^{\rm proxy}_{t2} + e_{it} $. The performance of the CCE estimator in Table~\ref{tab:extra1} and~\ref{tab:extra2} are identical (up to random MC noise), because the number of factors need not be specified for the CCE estimator, and the DGPs in Table~\ref{tab:extra1} and~\ref{tab:extra2} are identical. These tables show for $R=1$ in the DGP, the CCE estimator performs very well. From Chudik and Pesaran~\cite*{ChudikPesaran2015} we expect the CCE estimator to have a bias of order $1/T$ in a dynamic model, which is confirmed in the simulations: the bias of the CCE estimator shrinks roughly in inverse proportion to $T$, as $T$ becomes larger. The $1/T$ bias of the CCE estimator could be corrected for, and we would expect the bias-corrected CCE estimator to perform similarly to the bias-corrected LS estimator. However, if there are $R=2$ factors in the true DGP, then it turns out the proxies $\widehat f^{\rm proxy}_t$ do not pick those up correctly. Table~\ref{tab:extra3} shows for some parameter values and sample sizes (e.g., $\rho^0=0.3$ and $T=10$, or $\rho^0 = 0.9$ and $T=40$) the CCE estimator is almost unbiased, but for other values, including $T=80$, the CCE estimator is heavily biased if $R=2$. In particular, the bias of the CCE estimator does not seem to converge to zero as $T$ becomes large in this case. By contrast, the correctly specified LS estimators (i.e., correctly using $R=2$ factors in the estimation) performs very well according to Table~\ref{tab:extra3}. However, an incorrectly specified LS estimator, which would underestimate the number of factors (e.g., using $R=1$ factors in estimation instead of the correct number $R=2$) would probably perform similarly to the CCE estimator, because not all factors would be corrected for. Overestimating the number of factors (i.e., using $R=3$ factors in estimation instead of the correct number $R=2$) should, however, not pose a problem for the LS estimator, according to Moon and Weidner~\cite*{MoonWeidner2015}. \theendnotes \begin{thebibliography}{} \bibitem[\protect\astroncite{Bai}{2009}]{Bai2009} Bai, J. (2009). \newblock Panel data models with interactive fixed effects. \newblock {\em Econometrica}, 77(4):1229--1279. \bibitem[\protect\astroncite{Bhatia}{1997}]{Bhatia97} Bhatia, R. (1997). \newblock {\em Matrix {A}nalysis}. \newblock Springer-Verlag, New York. \bibitem[\protect\astroncite{Chudik and Pesaran}{2015}]{ChudikPesaran2015} Chudik, A. and Pesaran, M.~H. (2015). \newblock Common correlated effects estimation of heterogeneous dynamic panel data models with weakly exogenous regressors. \newblock {\em Journal of Econometrics}. \bibitem[\protect\astroncite{Galantai and Hegedus}{2006}]{GalantaiHegedus2006} Galantai, A. and Hegedus, C.~J. (2006). \newblock {Jordan's principal angles in complex vector spaces}. \newblock {\em Num. Lin. Alg. Appl.}, 13:589--598. \bibitem[\protect\astroncite{Golub and Van~Loan}{1996}]{golubvanloan1996} Golub, G.~H. and Van~Loan, C.~F. (1996). \newblock {\em Matrix Computations (Johns Hopkins Studies in Mathematical Sciences), Third Edition}. \newblock The Johns Hopkins University Press. \bibitem[\protect\astroncite{Hall and Heyde}{1980}]{HallHeyde1980} Hall, P. and Heyde, C. (1980). \newblock Martingale limit theory and applications. \newblock {\em Academic Press, New York}. \bibitem[\protect\astroncite{Latala}{2005}]{Latala2006} Latala, R. (2005). \newblock Some estimates of norms of random matrices. \newblock {\em Proc. Amer. Math. Soc.}, 133:1273--1282. \bibitem[\protect\astroncite{McLeish}{1974}]{Mcleish1974} McLeish, D. (1974). \newblock Dependent central limit theorems and invariance principles. \newblock {\em the Annals of Probability}, pages 620--628. \bibitem[\protect\astroncite{Moon and Weidner}{2015}]{MoonWeidner2015} Moon, H.~R. and Weidner, M. (2015). \newblock Linear regression for panel with unknown number of factors as interactive fixed effects. \newblock {\em Econometrica}, 83(4):1543--1579. \bibitem[\protect\astroncite{White}{2001}]{White2001} White, H. (2001). \newblock {\em Asymptotic theory for econometricians}. \newblock Academic press New York. \end{thebibliography} \newpage \section*{Tables} %%%%%%%%% % DATA FOR TABLES: %%%%%%%%% %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% TABLE DATA: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newcommand{\bOLSEbiasAA}{0.1232} \newcommand{\bOLSEstdeAA}{0.1444} \newcommand{\bOLSErmseAA}{0.1898} \newcommand{\bQMLEbiasAA}{-0.1419} \newcommand{\bQMLEstdeAA}{0.1480} \newcommand{\bQMLErmseAA}{0.2050} \newcommand{\bBCQMbiasAA}{-0.0713} \newcommand{\bBCQMstdeAA}{0.0982} \newcommand{\bBCQMrmseAA}{0.1213} \newcommand{\bCCEPbiasAA}{-0.1755} \newcommand{\bCCEPstdeAA}{0.1681} \newcommand{\bCCEPrmseAA}{0.2430} \newcommand{\bOLSEbiasAB}{0.0200} \newcommand{\bOLSEstdeAB}{0.0723} \newcommand{\bOLSErmseAB}{0.0750} \newcommand{\bQMLEbiasAB}{-0.3686} \newcommand{\bQMLEstdeAB}{0.1718} \newcommand{\bQMLErmseAB}{0.4067} \newcommand{\bBCQMbiasAB}{-0.2330} \newcommand{\bBCQMstdeAB}{0.1301} \newcommand{\bBCQMrmseAB}{0.2669} \newcommand{\bCCEPbiasAB}{-0.3298} \newcommand{\bCCEPstdeAB}{0.2203} \newcommand{\bCCEPrmseAB}{0.3966} \newcommand{\bOLSEbiasAC}{0.1339} \newcommand{\bOLSEstdeAC}{0.1148} \newcommand{\bOLSErmseAC}{0.1764} \newcommand{\bQMLEbiasAC}{-0.0542} \newcommand{\bQMLEstdeAC}{0.0596} \newcommand{\bQMLErmseAC}{0.0806} \newcommand{\bBCQMbiasAC}{-0.0201} \newcommand{\bBCQMstdeAC}{0.0423} \newcommand{\bBCQMrmseAC}{0.0469} \newcommand{\bCCEPbiasAC}{-0.0819} \newcommand{\bCCEPstdeAC}{0.0593} \newcommand{\bCCEPrmseAC}{0.1011} \newcommand{\bOLSEbiasAD}{0.0218} \newcommand{\bOLSEstdeAD}{0.0513} \newcommand{\bOLSErmseAD}{0.0557} \newcommand{\bQMLEbiasAD}{-0.1019} \newcommand{\bQMLEstdeAD}{0.1094} \newcommand{\bQMLErmseAD}{0.1495} \newcommand{\bBCQMbiasAD}{-0.0623} \newcommand{\bBCQMstdeAD}{0.0747} \newcommand{\bBCQMrmseAD}{0.0973} \newcommand{\bCCEPbiasAD}{-0.1436} \newcommand{\bCCEPstdeAD}{0.0972} \newcommand{\bCCEPrmseAD}{0.1734} \newcommand{\bOLSEbiasAE}{0.1441} \newcommand{\bOLSEstdeAE}{0.0879} \newcommand{\bOLSErmseAE}{0.1687} \newcommand{\bQMLEbiasAE}{-0.0264} \newcommand{\bQMLEstdeAE}{0.0284} \newcommand{\bQMLErmseAE}{0.0388} \newcommand{\bBCQMbiasAE}{-0.0070} \newcommand{\bBCQMstdeAE}{0.0240} \newcommand{\bBCQMrmseAE}{0.0250} \newcommand{\bCCEPbiasAE}{-0.0405} \newcommand{\bCCEPstdeAE}{0.0277} \newcommand{\bCCEPrmseAE}{0.0491} \newcommand{\bOLSEbiasAF}{0.0254} \newcommand{\bOLSEstdeAF}{0.0353} \newcommand{\bOLSErmseAF}{0.0434} \newcommand{\bQMLEbiasAF}{-0.0173} \newcommand{\bQMLEstdeAF}{0.0299} \newcommand{\bQMLErmseAF}{0.0345} \newcommand{\bBCQMbiasAF}{-0.0085} \newcommand{\bBCQMstdeAF}{0.0219} \newcommand{\bBCQMrmseAF}{0.0235} \newcommand{\bCCEPbiasAF}{-0.0617} \newcommand{\bCCEPstdeAF}{0.0406} \newcommand{\bCCEPrmseAF}{0.0739} \newcommand{\bOLSEbiasAG}{0.1517} \newcommand{\bOLSEstdeAG}{0.0657} \newcommand{\bOLSErmseAG}{0.1654} \newcommand{\bQMLEbiasAG}{-0.0130} \newcommand{\bQMLEstdeAG}{0.0170} \newcommand{\bQMLErmseAG}{0.0214} \newcommand{\bBCQMbiasAG}{-0.0021} \newcommand{\bBCQMstdeAG}{0.0160} \newcommand{\bBCQMrmseAG}{0.0161} \newcommand{\bCCEPbiasAG}{-0.0200} \newcommand{\bCCEPstdeAG}{0.0166} \newcommand{\bCCEPrmseAG}{0.0260} \newcommand{\bOLSEbiasAH}{0.0294} \newcommand{\bOLSEstdeAH}{0.0250} \newcommand{\bOLSErmseAH}{0.0386} \newcommand{\bQMLEbiasAH}{-0.0057} \newcommand{\bQMLEstdeAH}{0.0105} \newcommand{\bQMLErmseAH}{0.0119} \newcommand{\bBCQMbiasAH}{-0.0019} \newcommand{\bBCQMstdeAH}{0.0089} \newcommand{\bBCQMrmseAH}{0.0091} \newcommand{\bCCEPbiasAH}{-0.0281} \newcommand{\bCCEPstdeAH}{0.0162} \newcommand{\bCCEPrmseAH}{0.0324} \newcommand{\bOLSEbiasAI}{0.1552} \newcommand{\bOLSEstdeAI}{0.0487} \newcommand{\bOLSErmseAI}{0.1627} \newcommand{\bQMLEbiasAI}{-0.0066} \newcommand{\bQMLEstdeAI}{0.0112} \newcommand{\bQMLErmseAI}{0.0130} \newcommand{\bBCQMbiasAI}{-0.0007} \newcommand{\bBCQMstdeAI}{0.0109} \newcommand{\bBCQMrmseAI}{0.0109} \newcommand{\bCCEPbiasAI}{-0.0100} \newcommand{\bCCEPstdeAI}{0.0111} \newcommand{\bCCEPrmseAI}{0.0149} \newcommand{\bOLSEbiasAJ}{0.0326} \newcommand{\bOLSEstdeAJ}{0.0179} \newcommand{\bOLSErmseAJ}{0.0372} \newcommand{\bQMLEbiasAJ}{-0.0026} \newcommand{\bQMLEstdeAJ}{0.0056} \newcommand{\bQMLErmseAJ}{0.0062} \newcommand{\bBCQMbiasAJ}{-0.0006} \newcommand{\bBCQMstdeAJ}{0.0053} \newcommand{\bBCQMrmseAJ}{0.0053} \newcommand{\bCCEPbiasAJ}{-0.0136} \newcommand{\bCCEPstdeAJ}{0.0073} \newcommand{\bCCEPrmseAJ}{0.0154} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% TABLE DATA: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newcommand{\bbOLSEbiasAA}{0.1239} \newcommand{\bbOLSEstdeAA}{0.1454} \newcommand{\bbOLSErmseAA}{0.1910} \newcommand{\bbQMLEbiasAA}{-0.5467} \newcommand{\bbQMLEstdeAA}{0.1528} \newcommand{\bbQMLErmseAA}{0.5676} \newcommand{\bbBCQMbiasAA}{-0.3721} \newcommand{\bbBCQMstdeAA}{0.1299} \newcommand{\bbBCQMrmseAA}{0.3942} \newcommand{\bbCCEPbiasAA}{-0.1767} \newcommand{\bbCCEPstdeAA}{0.1678} \newcommand{\bbCCEPrmseAA}{0.2437} \newcommand{\bbOLSEbiasAB}{0.0218} \newcommand{\bbOLSEstdeAB}{0.0731} \newcommand{\bbOLSErmseAB}{0.0763} \newcommand{\bbQMLEbiasAB}{-0.9716} \newcommand{\bbQMLEstdeAB}{0.1216} \newcommand{\bbQMLErmseAB}{0.9792} \newcommand{\bbBCQMbiasAB}{-0.7490} \newcommand{\bbBCQMstdeAB}{0.1341} \newcommand{\bbBCQMrmseAB}{0.7609} \newcommand{\bbCCEPbiasAB}{-0.3289} \newcommand{\bbCCEPstdeAB}{0.2203} \newcommand{\bbCCEPrmseAB}{0.3958} \newcommand{\bbOLSEbiasAC}{0.1343} \newcommand{\bbOLSEstdeAC}{0.1145} \newcommand{\bbOLSErmseAC}{0.1765} \newcommand{\bbQMLEbiasAC}{-0.1874} \newcommand{\bbQMLEstdeAC}{0.1159} \newcommand{\bbQMLErmseAC}{0.2203} \newcommand{\bbBCQMbiasAC}{-0.1001} \newcommand{\bbBCQMstdeAC}{0.0758} \newcommand{\bbBCQMrmseAC}{0.1256} \newcommand{\bbCCEPbiasAC}{-0.0816} \newcommand{\bbCCEPstdeAC}{0.0592} \newcommand{\bbCCEPrmseAC}{0.1008} \newcommand{\bbOLSEbiasAD}{0.0210} \newcommand{\bbOLSEstdeAD}{0.0518} \newcommand{\bbOLSErmseAD}{0.0559} \newcommand{\bbQMLEbiasAD}{-0.4923} \newcommand{\bbQMLEstdeAD}{0.1159} \newcommand{\bbQMLErmseAD}{0.5058} \newcommand{\bbBCQMbiasAD}{-0.3271} \newcommand{\bbBCQMstdeAD}{0.0970} \newcommand{\bbBCQMrmseAD}{0.3412} \newcommand{\bbCCEPbiasAD}{-0.1414} \newcommand{\bbCCEPstdeAD}{0.0971} \newcommand{\bbCCEPrmseAD}{0.1715} \newcommand{\bbOLSEbiasAE}{0.1451} \newcommand{\bbOLSEstdeAE}{0.0879} \newcommand{\bbOLSErmseAE}{0.1696} \newcommand{\bbQMLEbiasAE}{-0.0448} \newcommand{\bbQMLEstdeAE}{0.0469} \newcommand{\bbQMLErmseAE}{0.0648} \newcommand{\bbBCQMbiasAE}{-0.0168} \newcommand{\bbBCQMstdeAE}{0.0320} \newcommand{\bbBCQMrmseAE}{0.0362} \newcommand{\bbCCEPbiasAE}{-0.0407} \newcommand{\bbCCEPstdeAE}{0.0277} \newcommand{\bbCCEPrmseAE}{0.0492} \newcommand{\bbOLSEbiasAF}{0.0255} \newcommand{\bbOLSEstdeAF}{0.0354} \newcommand{\bbOLSErmseAF}{0.0436} \newcommand{\bbQMLEbiasAF}{-0.1822} \newcommand{\bbQMLEstdeAF}{0.0820} \newcommand{\bbQMLErmseAF}{0.1999} \newcommand{\bbBCQMbiasAF}{-0.1085} \newcommand{\bbBCQMstdeAF}{0.0528} \newcommand{\bbBCQMrmseAF}{0.1207} \newcommand{\bbCCEPbiasAF}{-0.0618} \newcommand{\bbCCEPstdeAF}{0.0404} \newcommand{\bbCCEPrmseAF}{0.0739} \newcommand{\bbOLSEbiasAG}{0.1511} \newcommand{\bbOLSEstdeAG}{0.0663} \newcommand{\bbOLSErmseAG}{0.1650} \newcommand{\bbQMLEbiasAG}{-0.0161} \newcommand{\bbQMLEstdeAG}{0.0209} \newcommand{\bbQMLErmseAG}{0.0264} \newcommand{\bbBCQMbiasAG}{-0.0038} \newcommand{\bbBCQMstdeAG}{0.0177} \newcommand{\bbBCQMrmseAG}{0.0181} \newcommand{\bbCCEPbiasAG}{-0.0199} \newcommand{\bbCCEPstdeAG}{0.0167} \newcommand{\bbCCEPrmseAG}{0.0260} \newcommand{\bbOLSEbiasAH}{0.0300} \newcommand{\bbOLSEstdeAH}{0.0250} \newcommand{\bbOLSErmseAH}{0.0390} \newcommand{\bbQMLEbiasAH}{-0.0227} \newcommand{\bbQMLEstdeAH}{0.0342} \newcommand{\bbQMLErmseAH}{0.0410} \newcommand{\bbBCQMbiasAH}{-0.0128} \newcommand{\bbBCQMstdeAH}{0.0225} \newcommand{\bbBCQMrmseAH}{0.0258} \newcommand{\bbCCEPbiasAH}{-0.0282} \newcommand{\bbCCEPstdeAH}{0.0164} \newcommand{\bbCCEPrmseAH}{0.0326} \newcommand{\bbOLSEbiasAI}{0.1550} \newcommand{\bbOLSEstdeAI}{0.0488} \newcommand{\bbOLSErmseAI}{0.1625} \newcommand{\bbQMLEbiasAI}{-0.0072} \newcommand{\bbQMLEstdeAI}{0.0123} \newcommand{\bbQMLErmseAI}{0.0143} \newcommand{\bbBCQMbiasAI}{-0.0011} \newcommand{\bbBCQMstdeAI}{0.0115} \newcommand{\bbBCQMrmseAI}{0.0116} \newcommand{\bbCCEPbiasAI}{-0.0100} \newcommand{\bbCCEPstdeAI}{0.0111} \newcommand{\bbCCEPrmseAI}{0.0149} \newcommand{\bbOLSEbiasAJ}{0.0325} \newcommand{\bbOLSEstdeAJ}{0.0182} \newcommand{\bbOLSErmseAJ}{0.0372} \newcommand{\bbQMLEbiasAJ}{-0.0030} \newcommand{\bbQMLEstdeAJ}{0.0064} \newcommand{\bbQMLErmseAJ}{0.0071} \newcommand{\bbBCQMbiasAJ}{-0.0010} \newcommand{\bbBCQMstdeAJ}{0.0057} \newcommand{\bbBCQMrmseAJ}{0.0058} \newcommand{\bbCCEPbiasAJ}{-0.0136} \newcommand{\bbCCEPstdeAJ}{0.0074} \newcommand{\bbCCEPrmseAJ}{0.0155} %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% %%%%% TABLE EXTRA DATA: %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% \newcommand{\bbbbOLSEbiasAA}{0.1861} \newcommand{\bbbbOLSEstdeAA}{0.1562} \newcommand{\bbbbOLSErmseAA}{0.2429} \newcommand{\bbbbQMLEbiasAA}{-0.4968} \newcommand{\bbbbQMLEstdeAA}{0.1910} \newcommand{\bbbbQMLErmseAA}{0.5322} \newcommand{\bbbbBCQMbiasAA}{-0.3323} \newcommand{\bbbbBCQMstdeAA}{0.1580} \newcommand{\bbbbBCQMrmseAA}{0.3680} \newcommand{\bbbbCCEPbiasAA}{-0.1002} \newcommand{\bbbbCCEPstdeAA}{0.2063} \newcommand{\bbbbCCEPrmseAA}{0.2294} \newcommand{\bbbbOLSEbiasAB}{0.0309} \newcommand{\bbbbOLSEstdeAB}{0.0801} \newcommand{\bbbbOLSErmseAB}{0.0859} \newcommand{\bbbbQMLEbiasAB}{-0.9305} \newcommand{\bbbbQMLEstdeAB}{0.1644} \newcommand{\bbbbQMLErmseAB}{0.9449} \newcommand{\bbbbBCQMbiasAB}{-0.7057} \newcommand{\bbbbBCQMstdeAB}{0.1754} \newcommand{\bbbbBCQMrmseAB}{0.7272} \newcommand{\bbbbCCEPbiasAB}{-0.2750} \newcommand{\bbbbCCEPstdeAB}{0.2302} \newcommand{\bbbbCCEPrmseAB}{0.3586} \newcommand{\bbbbOLSEbiasAC}{0.1989} \newcommand{\bbbbOLSEstdeAC}{0.1185} \newcommand{\bbbbOLSErmseAC}{0.2315} \newcommand{\bbbbQMLEbiasAC}{-0.1569} \newcommand{\bbbbQMLEstdeAC}{0.1018} \newcommand{\bbbbQMLErmseAC}{0.1870} \newcommand{\bbbbBCQMbiasAC}{-0.0758} \newcommand{\bbbbBCQMstdeAC}{0.0700} \newcommand{\bbbbBCQMrmseAC}{0.1031} \newcommand{\bbbbCCEPbiasAC}{0.0036} \newcommand{\bbbbCCEPstdeAC}{0.1074} \newcommand{\bbbbCCEPrmseAC}{0.1074} \newcommand{\bbbbOLSEbiasAD}{0.0326} \newcommand{\bbbbOLSEstdeAD}{0.0543} \newcommand{\bbbbOLSErmseAD}{0.0633} \newcommand{\bbbbQMLEbiasAD}{-0.4209} \newcommand{\bbbbQMLEstdeAD}{0.1607} \newcommand{\bbbbQMLErmseAD}{0.4505} \newcommand{\bbbbBCQMbiasAD}{-0.2732} \newcommand{\bbbbBCQMstdeAD}{0.1235} \newcommand{\bbbbBCQMrmseAD}{0.2998} \newcommand{\bbbbCCEPbiasAD}{-0.1040} \newcommand{\bbbbCCEPstdeAD}{0.1070} \newcommand{\bbbbCCEPrmseAD}{0.1492} \newcommand{\bbbbOLSEbiasAE}{0.2096} \newcommand{\bbbbOLSEstdeAE}{0.0884} \newcommand{\bbbbOLSErmseAE}{0.2274} \newcommand{\bbbbQMLEbiasAE}{-0.0592} \newcommand{\bbbbQMLEstdeAE}{0.0377} \newcommand{\bbbbQMLErmseAE}{0.0702} \newcommand{\bbbbBCQMbiasAE}{-0.0185} \newcommand{\bbbbBCQMstdeAE}{0.0287} \newcommand{\bbbbBCQMrmseAE}{0.0341} \newcommand{\bbbbCCEPbiasAE}{0.0520} \newcommand{\bbbbCCEPstdeAE}{0.0711} \newcommand{\bbbbCCEPrmseAE}{0.0881} \newcommand{\bbbbOLSEbiasAF}{0.0366} \newcommand{\bbbbOLSEstdeAF}{0.0356} \newcommand{\bbbbOLSErmseAF}{0.0511} \newcommand{\bbbbQMLEbiasAF}{-0.0741} \newcommand{\bbbbQMLEstdeAF}{0.0859} \newcommand{\bbbbQMLErmseAF}{0.1134} \newcommand{\bbbbBCQMbiasAF}{-0.0406} \newcommand{\bbbbBCQMstdeAF}{0.0552} \newcommand{\bbbbBCQMrmseAF}{0.0686} \newcommand{\bbbbCCEPbiasAF}{-0.0310} \newcommand{\bbbbCCEPstdeAF}{0.0512} \newcommand{\bbbbCCEPrmseAF}{0.0599} \newcommand{\bbbbOLSEbiasAG}{0.2174} \newcommand{\bbbbOLSEstdeAG}{0.0649} \newcommand{\bbbbOLSErmseAG}{0.2269} \newcommand{\bbbbQMLEbiasAG}{-0.0275} \newcommand{\bbbbQMLEstdeAG}{0.0192} \newcommand{\bbbbQMLErmseAG}{0.0335} \newcommand{\bbbbBCQMbiasAG}{-0.0054} \newcommand{\bbbbBCQMstdeAG}{0.0170} \newcommand{\bbbbBCQMrmseAG}{0.0179} \newcommand{\bbbbCCEPbiasAG}{0.0759} \newcommand{\bbbbCCEPstdeAG}{0.0500} \newcommand{\bbbbCCEPrmseAG}{0.0908} \newcommand{\bbbbOLSEbiasAH}{0.0404} \newcommand{\bbbbOLSEstdeAH}{0.0239} \newcommand{\bbbbOLSErmseAH}{0.0469} \newcommand{\bbbbQMLEbiasAH}{-0.0134} \newcommand{\bbbbQMLEstdeAH}{0.0166} \newcommand{\bbbbQMLErmseAH}{0.0214} \newcommand{\bbbbBCQMbiasAH}{-0.0047} \newcommand{\bbbbBCQMstdeAH}{0.0122} \newcommand{\bbbbBCQMrmseAH}{0.0131} \newcommand{\bbbbCCEPbiasAH}{-0.0012} \newcommand{\bbbbCCEPstdeAH}{0.0281} \newcommand{\bbbbCCEPrmseAH}{0.0281} \newcommand{\bbbbOLSEbiasAI}{0.2232} \newcommand{\bbbbOLSEstdeAI}{0.0472} \newcommand{\bbbbOLSErmseAI}{0.2281} \newcommand{\bbbbQMLEbiasAI}{-0.0134} \newcommand{\bbbbQMLEstdeAI}{0.0118} \newcommand{\bbbbQMLErmseAI}{0.0179} \newcommand{\bbbbBCQMbiasAI}{-0.0016} \newcommand{\bbbbBCQMstdeAI}{0.0113} \newcommand{\bbbbBCQMrmseAI}{0.0114} \newcommand{\bbbbCCEPbiasAI}{0.0873} \newcommand{\bbbbCCEPstdeAI}{0.0364} \newcommand{\bbbbCCEPrmseAI}{0.0946} \newcommand{\bbbbOLSEbiasAJ}{0.0433} \newcommand{\bbbbOLSEstdeAJ}{0.0164} \newcommand{\bbbbOLSErmseAJ}{0.0463} \newcommand{\bbbbQMLEbiasAJ}{-0.0052} \newcommand{\bbbbQMLEstdeAJ}{0.0066} \newcommand{\bbbbQMLErmseAJ}{0.0084} \newcommand{\bbbbBCQMbiasAJ}{-0.0012} \newcommand{\bbbbBCQMstdeAJ}{0.0058} \newcommand{\bbbbBCQMrmseAJ}{0.0059} \newcommand{\bbbbCCEPbiasAJ}{0.0125} \newcommand{\bbbbCCEPstdeAJ}{0.0176} \newcommand{\bbbbCCEPrmseAJ}{0.0216} \begin{table}[H] \begin{center} \caption{\label{tab:extra1} %\footnotesize Same as Table~\ref{tab:T1} in main paper, but also reporting pooled CCE estimator of Pesaran~(2006). } \vspace{0.5cm} \begin{tabular}{l@{\;\,}l@{\quad}l@{\;\;}l@{\;\;}l@{\;\;}l@{\quad}l@{\;\;}l@{\;\;}l@{\;\;}l} \hline & & \multicolumn{4}{c}{$\rho^0=0.3$} & \multicolumn{4}{c}{$\rho^0=0.9$} \\[0.1cm] & & OLS & FLS & BC-FLS & CCE & OLS & FLS & BC-FLS & CCE \\ \hline $T=5$ & bias & \bOLSEbiasAA & \bQMLEbiasAA & \bBCQMbiasAA & \bCCEPbiasAA & \bOLSEbiasAB & \bQMLEbiasAB & \bBCQMbiasAB & \bCCEPbiasAB \\ $(M=2)$ & std & \bOLSEstdeAA & \bQMLEstdeAA & \bBCQMstdeAA & \bCCEPstdeAA & \bOLSEstdeAB & \bQMLEstdeAB & \bBCQMstdeAB & \bCCEPstdeAB \\ & rmse & \bOLSErmseAA & \bQMLErmseAA & \bBCQMrmseAA & \bCCEPrmseAA & \bOLSErmseAB & \bQMLErmseAB & \bBCQMrmseAB & \bCCEPrmseAB \\[8pt] $T=10$ & bias & \bOLSEbiasAC & \bQMLEbiasAC & \bBCQMbiasAC & \bCCEPbiasAC & \bOLSEbiasAD & \bQMLEbiasAD & \bBCQMbiasAD & \bCCEPbiasAD \\ $(M=3)$ & std & \bOLSEstdeAC & \bQMLEstdeAC & \bBCQMstdeAC & \bCCEPstdeAC & \bOLSEstdeAD & \bQMLEstdeAD & \bBCQMstdeAD & \bCCEPstdeAD \\ & rmse & \bOLSErmseAC & \bQMLErmseAC & \bBCQMrmseAC & \bCCEPrmseAC & \bOLSErmseAD & \bQMLErmseAD & \bBCQMrmseAD & \bCCEPrmseAD \\[8pt] $T=20$ & bias & \bOLSEbiasAE & \bQMLEbiasAE & \bBCQMbiasAE & \bCCEPbiasAE & \bOLSEbiasAF & \bQMLEbiasAF & \bBCQMbiasAF & \bCCEPbiasAF \\ $(M=4)$ & std & \bOLSEstdeAE & \bQMLEstdeAE & \bBCQMstdeAE & \bCCEPstdeAE & \bOLSEstdeAF & \bQMLEstdeAF & \bBCQMstdeAF & \bCCEPstdeAF \\ & rmse & \bOLSErmseAE & \bQMLErmseAE & \bBCQMrmseAE & \bCCEPrmseAE & \bOLSErmseAF & \bQMLErmseAF & \bBCQMrmseAF & \bCCEPrmseAF \\[8pt] $T=40$ & bias & \bOLSEbiasAG & \bQMLEbiasAG & \bBCQMbiasAG & \bCCEPbiasAG & \bOLSEbiasAH & \bQMLEbiasAH & \bBCQMbiasAH & \bCCEPbiasAH \\ $(M=5)$ & std & \bOLSEstdeAG & \bQMLEstdeAG & \bBCQMstdeAG & \bCCEPstdeAG & \bOLSEstdeAH & \bQMLEstdeAH & \bBCQMstdeAH & \bCCEPstdeAH \\ & rmse & \bOLSErmseAG & \bQMLErmseAG & \bBCQMrmseAG & \bCCEPrmseAG & \bOLSErmseAH & \bQMLErmseAH & \bBCQMrmseAH & \bCCEPrmseAH \\[8pt] $T=80$ & bias & \bOLSEbiasAI & \bQMLEbiasAI & \bBCQMbiasAI & \bCCEPbiasAI & \bOLSEbiasAJ & \bQMLEbiasAJ & \bBCQMbiasAJ & \bCCEPbiasAJ \\ $(M=6)$ & std & \bOLSEstdeAI & \bQMLEstdeAI & \bBCQMstdeAI & \bCCEPstdeAI & \bOLSEstdeAJ & \bQMLEstdeAJ & \bBCQMstdeAJ & \bCCEPstdeAJ \\ & rmse & \bOLSErmseAI & \bQMLErmseAI & \bBCQMrmseAI & \bCCEPrmseAI & \bOLSErmseAJ & \bQMLErmseAJ & \bBCQMrmseAJ & \bCCEPrmseAJ \\ \hline \end{tabular} \end{center} \end{table} \begin{table}[H] \begin{center} \caption{\label{tab:extra2} %\footnotesize Same as Table~\ref{tab:T2} in main paper, but also reporting pooled CCE estimator of Pesaran~(2006). } \vspace{0.5cm} \begin{tabular}{l@{\;\,}l@{\quad}l@{\;\;}l@{\;\;}l@{\;\;}l@{\quad}l@{\;\;}l@{\;\;}l@{\;\;}l} \hline & & \multicolumn{4}{c}{$\rho^0=0.3$} & \multicolumn{4}{c}{$\rho^0=0.9$} \\[0.1cm] & & OLS & FLS & BC-FLS & CCE & OLS & FLS & BC-FLS & CCE \\ \hline $T=5$ & bias & \bbOLSEbiasAA & \bbQMLEbiasAA & \bbBCQMbiasAA & \bbCCEPbiasAA & \bbOLSEbiasAB & \bbQMLEbiasAB & \bbBCQMbiasAB & \bbCCEPbiasAB \\ $(M=2)$ & std & \bbOLSEstdeAA & \bbQMLEstdeAA & \bbBCQMstdeAA & \bbCCEPstdeAA & \bbOLSEstdeAB & \bbQMLEstdeAB & \bbBCQMstdeAB & \bbCCEPstdeAB \\ & rmse & \bbOLSErmseAA & \bbQMLErmseAA & \bbBCQMrmseAA & \bbCCEPrmseAA & \bbOLSErmseAB & \bbQMLErmseAB & \bbBCQMrmseAB & \bbCCEPrmseAB \\[8pt] $T=10$ & bias & \bbOLSEbiasAC & \bbQMLEbiasAC & \bbBCQMbiasAC & \bbCCEPbiasAC & \bbOLSEbiasAD & \bbQMLEbiasAD & \bbBCQMbiasAD & \bbCCEPbiasAD \\ $(M=3)$ & std & \bbOLSEstdeAC & \bbQMLEstdeAC & \bbBCQMstdeAC & \bbCCEPstdeAC & \bbOLSEstdeAD & \bbQMLEstdeAD & \bbBCQMstdeAD & \bbCCEPstdeAD \\ & rmse & \bbOLSErmseAC & \bbQMLErmseAC & \bbBCQMrmseAC & \bbCCEPrmseAC & \bbOLSErmseAD & \bbQMLErmseAD & \bbBCQMrmseAD & \bbCCEPrmseAD \\[8pt] $T=20$ & bias & \bbOLSEbiasAE & \bbQMLEbiasAE & \bbBCQMbiasAE & \bbCCEPbiasAE & \bbOLSEbiasAF & \bbQMLEbiasAF & \bbBCQMbiasAF & \bbCCEPbiasAF \\ $(M=4)$ & std & \bbOLSEstdeAE & \bbQMLEstdeAE & \bbBCQMstdeAE & \bbCCEPstdeAE & \bbOLSEstdeAF & \bbQMLEstdeAF & \bbBCQMstdeAF & \bbCCEPstdeAF \\ & rmse & \bbOLSErmseAE & \bbQMLErmseAE & \bbBCQMrmseAE & \bbCCEPrmseAE & \bbOLSErmseAF & \bbQMLErmseAF & \bbBCQMrmseAF & \bbCCEPrmseAF \\[8pt] $T=40$ & bias & \bbOLSEbiasAG & \bbQMLEbiasAG & \bbBCQMbiasAG & \bbCCEPbiasAG & \bbOLSEbiasAH & \bbQMLEbiasAH & \bbBCQMbiasAH & \bbCCEPbiasAH \\ $(M=5)$ & std & \bbOLSEstdeAG & \bbQMLEstdeAG & \bbBCQMstdeAG & \bbCCEPstdeAG & \bbOLSEstdeAH & \bbQMLEstdeAH & \bbBCQMstdeAH & \bbCCEPstdeAH \\ & rmse & \bbOLSErmseAG & \bbQMLErmseAG & \bbBCQMrmseAG & \bbCCEPrmseAG & \bbOLSErmseAH & \bbQMLErmseAH & \bbBCQMrmseAH & \bbCCEPrmseAH \\[8pt] $T=80$ & bias & \bbOLSEbiasAI & \bbQMLEbiasAI & \bbBCQMbiasAI & \bbCCEPbiasAI & \bbOLSEbiasAJ & \bbQMLEbiasAJ & \bbBCQMbiasAJ & \bbCCEPbiasAJ \\ $(M=6)$ & std & \bbOLSEstdeAI & \bbQMLEstdeAI & \bbBCQMstdeAI & \bbCCEPstdeAI & \bbOLSEstdeAJ & \bbQMLEstdeAJ & \bbBCQMstdeAJ & \bbCCEPstdeAJ \\ & rmse & \bbOLSErmseAI & \bbQMLErmseAI & \bbBCQMrmseAI & \bbCCEPrmseAI & \bbOLSErmseAJ & \bbQMLErmseAJ & \bbBCQMrmseAJ & \bbCCEPrmseAJ \\ \hline \end{tabular} \end{center} \end{table} \begin{table}[H] \begin{center} \caption{\label{tab:extra3} %\footnotesize Analogous to Table~\ref{tab:T2} in main paper, but with $R=2$ correctly specified, and also reporting pooled CCE estimator of Pesaran~(2006). } \vspace{0.5cm} \begin{tabular}{l@{\;\,}l@{\quad}l@{\;\;}l@{\;\;}l@{\;\;}l@{\quad}l@{\;\;}l@{\;\;}l@{\;\;}l} \hline & & \multicolumn{4}{c}{$\rho^0=0.3$} & \multicolumn{4}{c}{$\rho^0=0.9$} \\[0.1cm] & & OLS & FLS & BC-FLS & CCE & OLS & FLS & BC-FLS & CCE \\ \hline $T=5$ & bias & \bbbbOLSEbiasAA & \bbbbQMLEbiasAA & \bbbbBCQMbiasAA & \bbbbCCEPbiasAA & \bbbbOLSEbiasAB & \bbbbQMLEbiasAB & \bbbbBCQMbiasAB & \bbbbCCEPbiasAB \\ $(M=2)$ & std & \bbbbOLSEstdeAA & \bbbbQMLEstdeAA & \bbbbBCQMstdeAA & \bbbbCCEPstdeAA & \bbbbOLSEstdeAB & \bbbbQMLEstdeAB & \bbbbBCQMstdeAB & \bbbbCCEPstdeAB \\ & rmse & \bbbbOLSErmseAA & \bbbbQMLErmseAA & \bbbbBCQMrmseAA & \bbbbCCEPrmseAA & \bbbbOLSErmseAB & \bbbbQMLErmseAB & \bbbbBCQMrmseAB & \bbbbCCEPrmseAB \\[8pt] $T=10$ & bias & \bbbbOLSEbiasAC & \bbbbQMLEbiasAC & \bbbbBCQMbiasAC & \bbbbCCEPbiasAC & \bbbbOLSEbiasAD & \bbbbQMLEbiasAD & \bbbbBCQMbiasAD & \bbbbCCEPbiasAD \\ $(M=3)$ & std & \bbbbOLSEstdeAC & \bbbbQMLEstdeAC & \bbbbBCQMstdeAC & \bbbbCCEPstdeAC & \bbbbOLSEstdeAD & \bbbbQMLEstdeAD & \bbbbBCQMstdeAD & \bbbbCCEPstdeAD \\ & rmse & \bbbbOLSErmseAC & \bbbbQMLErmseAC & \bbbbBCQMrmseAC & \bbbbCCEPrmseAC & \bbbbOLSErmseAD & \bbbbQMLErmseAD & \bbbbBCQMrmseAD & \bbbbCCEPrmseAD \\[8pt] $T=20$ & bias & \bbbbOLSEbiasAE & \bbbbQMLEbiasAE & \bbbbBCQMbiasAE & \bbbbCCEPbiasAE & \bbbbOLSEbiasAF & \bbbbQMLEbiasAF & \bbbbBCQMbiasAF & \bbbbCCEPbiasAF \\ $(M=4)$ & std & \bbbbOLSEstdeAE & \bbbbQMLEstdeAE & \bbbbBCQMstdeAE & \bbbbCCEPstdeAE & \bbbbOLSEstdeAF & \bbbbQMLEstdeAF & \bbbbBCQMstdeAF & \bbbbCCEPstdeAF \\ & rmse & \bbbbOLSErmseAE & \bbbbQMLErmseAE & \bbbbBCQMrmseAE & \bbbbCCEPrmseAE & \bbbbOLSErmseAF & \bbbbQMLErmseAF & \bbbbBCQMrmseAF & \bbbbCCEPrmseAF \\[8pt] $T=40$ & bias & \bbbbOLSEbiasAG & \bbbbQMLEbiasAG & \bbbbBCQMbiasAG & \bbbbCCEPbiasAG & \bbbbOLSEbiasAH & \bbbbQMLEbiasAH & \bbbbBCQMbiasAH & \bbbbCCEPbiasAH \\ $(M=5)$ & std & \bbbbOLSEstdeAG & \bbbbQMLEstdeAG & \bbbbBCQMstdeAG & \bbbbCCEPstdeAG & \bbbbOLSEstdeAH & \bbbbQMLEstdeAH & \bbbbBCQMstdeAH & \bbbbCCEPstdeAH \\ & rmse & \bbbbOLSErmseAG & \bbbbQMLErmseAG & \bbbbBCQMrmseAG & \bbbbCCEPrmseAG & \bbbbOLSErmseAH & \bbbbQMLErmseAH & \bbbbBCQMrmseAH & \bbbbCCEPrmseAH \\[8pt] $T=80$ & bias & \bbbbOLSEbiasAI & \bbbbQMLEbiasAI & \bbbbBCQMbiasAI & \bbbbCCEPbiasAI & \bbbbOLSEbiasAJ & \bbbbQMLEbiasAJ & \bbbbBCQMbiasAJ & \bbbbCCEPbiasAJ \\ $(M=6)$ & std & \bbbbOLSEstdeAI & \bbbbQMLEstdeAI & \bbbbBCQMstdeAI & \bbbbCCEPstdeAI & \bbbbOLSEstdeAJ & \bbbbQMLEstdeAJ & \bbbbBCQMstdeAJ & \bbbbCCEPstdeAJ \\ & rmse & \bbbbOLSErmseAI & \bbbbQMLErmseAI & \bbbbBCQMrmseAI & \bbbbCCEPrmseAI & \bbbbOLSErmseAJ & \bbbbQMLErmseAJ & \bbbbBCQMrmseAJ & \bbbbCCEPrmseAJ \\ \hline \end{tabular} \end{center} \end{table} \end{document}