\documentclass[11pt]{article}% \usepackage{amssymb} \usepackage{amsmath} \usepackage{amsfonts} \usepackage{geometry} \usepackage[onehalfspacing]{setspace} \usepackage{numinsec} \usepackage{harvard} \usepackage{hyperref} \usepackage{graphicx}% \setcounter{MaxMatrixCols}{30} %TCIDATA{OutputFilter=latex2.dll} %TCIDATA{Version=5.50.0.2890} %TCIDATA{CSTFile=LaTeX article (bright).cst} %TCIDATA{Created=Saturday, July 19, 2008 23:46:52} %TCIDATA{LastRevised=Monday, January 15, 2018 20:37:49} %TCIDATA{} %TCIDATA{} %TCIDATA{BibliographyScheme=BibTeX} %TCIDATA{} %TCIDATA{Language=American English} %BeginMSIPreambleData \providecommand{\U}[1]{\protect\rule{.1in}{.1in}} %EndMSIPreambleData \newtheorem{theorem}{Theorem}[section] \newtheorem{acknowledgement}[theorem]{Acknowledgement} \newtheorem{algorithm}[theorem]{Algorithm} \newtheorem{axiom}[theorem]{Axiom} \newtheorem{case}[theorem]{Case} \newtheorem{claim}[theorem]{Claim} \newtheorem{conclusion}[theorem]{Conclusion} \newtheorem{condition}[theorem]{Condition} \newtheorem{conjecture}[theorem]{Conjecture} \newtheorem{corollary}[theorem]{Corollary} \newtheorem{criterion}[theorem]{Criterion} \newtheorem{definition}[theorem]{Definition} \newtheorem{example}[theorem]{Example} \newtheorem{exercise}[theorem]{Exercise} \newtheorem{lemma}{Lemma}[section] \newtheorem{notation}[theorem]{Notation} \newtheorem{problem}[theorem]{Problem} \newtheorem{proposition}[theorem]{Proposition} \newtheorem{remark}[theorem]{Remark} \newtheorem{solution}[theorem]{Solution} \newtheorem{summary}[theorem]{Summary} \newtheorem{assumption}{Assumption}[section] \newenvironment{proof}[1][Proof]{\noindent \textbf{#1.} }{\ \rule{0.5em}{0.5em}} \renewcommand{\baselinestretch}{1.5} \oddsidemargin -.1in \evensidemargin -.1in \marginparwidth 1in \marginparsep 0pt \topmargin 0pt \headheight 0pt \headsep 0pt \textheight 8.9in \textwidth 6.9in \topskip 0pt \footskip 1cm \begin{document} \title{Supplemental Appendix for \textquotedblleft Nonparametric Two-Step Sieve M Estimation and Inference\textquotedblright} \author{Jinyong Hahn\thanks{Department of Economics, UCLA, Los Angeles, CA 90095-1477 USA. Email:\ hahn@econ.ucla.edu}\\UCLA \and Zhipeng Liao\thanks{Department of Economics, UCLA, Los Angeles, CA 90095-1477 USA. Email:\ zhipeng.liao@econ.ucla.edu}\\UCLA \and Geert Ridder\thanks{Department of Economics, University of Southern California, Los Angeles, CA 90089.\ Email:\ ridder@usc.edu.}\\USC} \date{This version: December 2017} \maketitle This supplemental appendix provides some auxiliary materials for "Nonparametric Two-Step Sieve M Estimation and Inference"(cited as HLR in this appendix). Section\ 1 provides sufficient conditions for Assumptions 3.2 and 3.4 in HLR which are the key high-level conditions for asymptotic normality of the two-step sieve M estimator. Section 2 presents some lemmas which are used in proving Theorem 5.1 in HLR. Section 3 contains verification of the high-level assumptions for asymptotic normality in the nonparametric triangular simultaneous equation model. Section 4 contains some extra simulation results.\ Section 5 establishes general theory on the consistency and convergence rate of the nonparametric two-step sieve M estimator. \section{Sufficient Conditions for Assumptions 3.2 and 3.4 in HLR} In this section, we provide sufficient conditions for the high-level assumptions (Assumptions 3.2\ and 3.4) of the asymptotic normality of the nonparametric two-step sieve M estimator. These sufficient conditions are verified in the nonparametric triangular simultaneous equation model in Section 3 of the Appendix.\ We assume that the data $\left\{ Z_{i}\right\} _{i=1}^{n}$ is i.i.d. in this section. \begin{assumption} \label{L-SA-1} (i) For any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha \in\mathcal{N}_{\alpha}$ and any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$, the following directional derivatives exist% \[ \Delta_{\psi}(z_{2},\alpha)[v_{g,1}]=\left. \frac{\partial\psi(z_{2},g+\tau v_{g,1},h)}{\partial\tau}\right\vert _{\tau=0}\text{ and }r_{\psi,g}% (z_{2},\alpha)[v_{g,1},v_{g,2}]=\left. \frac{\partial\Delta_{\psi}% (z_{2},g+\tau v_{g,2},h)[v_{g,1}]}{\partial\tau}\right\vert _{\tau=0}; \] (ii) there exists $\Lambda_{1,n}(z_{2})$ with $\mathbb{E}\left[ \Lambda _{1,n}(Z_{2})\right] \leq C$ such that \[ \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \psi(z_{2},g^{\ast},h)-\psi (z_{2},\alpha)-\Delta_{\psi}(z_{2},\alpha)[\pm\kappa_{n}u_{g_{n}}^{\ast }]-\kappa_{n}^{2}r_{\psi,g}(z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast }]\right\vert \leq\kappa_{n}^{2}\Lambda_{1,n}(z_{2}); \] (iii) there exists $\Lambda_{2,n}(z_{2})$ with $\mathbb{E}\left[ \Lambda_{2,n}(Z_{2})\right] \leq C$ such that% \[ \sup_{\alpha\in\mathcal{N}_{n}}\left\vert r_{\psi,g}(z_{2},\alpha)[u_{g_{n}% }^{\ast},u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast },u_{g_{n}}^{\ast}]\right\vert \leq\Lambda_{2,n}(z_{2}); \] (iv) $\mathbb{E}\left[ \left\vert r_{\psi,g}(Z_{2},\alpha_{o})[u_{g_{n}% }^{\ast},u_{g_{n}}^{\ast}]\right\vert \right] \leq C$; (v) $\mathbb{E}\left[ r_{\psi,h}(Z_{2},\alpha_{o})[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]\right] =o(n^{-1/2})$. \end{assumption} \begin{assumption} \label{L-SA-2} (i) For any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha \in\mathcal{N}_{\alpha}$, any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}% \in\mathcal{V}_{2}$, the following directional derivative exists% \[ \left. \frac{\partial\Delta_{\psi}(z_{2},g,h+\tau v_{h})[v_{g}]}{\partial \tau}\right\vert _{\tau=0}=r_{\psi,h}(z_{2},\alpha)[v_{g},v_{h}]; \] (ii) there exists $\Lambda_{3,n}(z_{2},\alpha)$ such that for any $\alpha \in\mathcal{N}_{n}$,% \[ \left\vert \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}% (z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}% }^{\ast}]\right\vert \leq\Lambda_{3,n}(z_{2},\alpha); \] (iii) there exists $\Lambda_{4,n}(z_{2},\alpha)$ such that for any $\alpha \in\mathcal{N}_{n}$,% \[ \left\vert \Delta_{\psi}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}% (z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}(z_{2},g_{o},h_{o}% )[h-h_{o},u_{g_{n}}^{\ast}]\right\vert \leq\Lambda_{4,n}(z_{2},\alpha); \] (iv) there exists $\Lambda_{5,n}(z_{2},\alpha)$ such that for any $\alpha \in\mathcal{N}_{n}$,% \[ \left\vert r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]-r_{\psi ,g}(z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\vert \leq\Lambda _{5,n}(z_{2},\alpha); \] (v) $\max_{j=3,4,5}\sup_{\alpha\in\mathcal{N}_{n}}n^{-1/2}\sum_{i=1}% ^{n}\Lambda_{j,n}(Z_{2,i},\alpha)=o_{p}(1)$; (vi) $\max_{j=3,4,5}\sup _{\alpha\in\mathcal{N}_{n}}\mathbb{E}\left[ \Lambda_{j,n}(Z_{2}% ,\alpha)\right] =o(n^{1/2})$. \end{assumption} By Assumption \ref{L-SA-1}.(i) and the definition of $\left\Vert \cdot\right\Vert _{\psi}$, we have \[ \langle v_{g,1},v_{g,2}\rangle_{\psi}=\mathbb{E}\left[ r_{\psi,g}% (Z_{2},\alpha_{o})[v_{g,1},v_{g,2}]\right] \] for any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$. By Assumption \ref{L-SA-2}.(i), we have \[ \Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] =\mathbb{E}\left[ r_{\psi ,h}(Z_{2},\alpha_{o})[v_{h},v_{g}]\right] \] for any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}\in\mathcal{V}_{2}$. Suppose that $\mathcal{F}$ is a class of functions of $Z$. Let $F$ denote an envelope of $\mathcal{F}$, \[ F(z)\geq\sup_{f\in\mathcal{F}}\left\vert f(z)\right\vert \text{ for any }% z\in\mathcal{Z}% \] where $\mathcal{Z}$ denotes the support of $Z$. For a probability measure $Q$ and a constant $q$, such that $\left\Vert F\right\Vert _{Q,q}>0$ (where $\left\Vert \cdot\right\Vert _{Q,q}$ denotes the $L_{q}$-norm under $Q$), we use $N(\varepsilon\left\Vert F\right\Vert _{Q,q},\mathcal{F},\left\Vert \cdot\right\Vert _{Q,q})$ to denote the minimal number of $\left\Vert \cdot\right\Vert _{Q,q}$-balls of radius $\varepsilon\left\Vert F\right\Vert _{Q,q}$ needed to cover $\mathcal{F}$. The supremum of $N(\varepsilon \left\Vert F\right\Vert _{Q,q},\mathcal{F},\left\Vert \cdot\right\Vert _{Q,q})$ over all finitely-discrete probability measures $Q$, is a uniform entropy number of $\mathcal{F}$. Define \begin{align*} \mathcal{F}_{1,n}^{\ast} & =\left\{ z_{2}\mapsto r_{\psi,h}(z_{2}% ,\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]:h\in\mathcal{N}_{h,n}\right\} ,\\ \mathcal{F}_{2,n}^{\ast} & =\left\{ z_{2}\mapsto r_{\psi,g}(z_{2}% ,\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]:g\in\mathcal{N}_{g,n}\right\} , \end{align*} where $h_{o,n}\in\mathcal{H}_{n}$ and $g_{o,n}\in\mathcal{G}_{n}$ are such that $\left\Vert h_{o,n}-h_{o}\right\Vert _{\mathcal{H}}=O(\delta_{1,n}^{\ast })$ and $\left\Vert g_{o,n}-g_{o}\right\Vert _{\mathcal{G}}=O(\delta _{2,n}^{\ast})$. \begin{assumption} \label{L-SA-3} (i) $\mathbb{E}\left[ \left\vert r_{\psi,h}(Z_{2},\alpha _{o})[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]\right\vert \right] =o(n^{-1/2})$; (ii) let $F_{1,n}^{\ast}$ denote an envelope of $\mathcal{F}_{1,n}^{\ast}$, then \[ \sup_{Q}N(\varepsilon\left\Vert F_{1,n}^{\ast}\right\Vert _{Q,2}% ,\mathcal{F}_{1,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any }\varepsilon\in(0,1]; \] (iii) $\mathbb{E}\left[ \left\vert r_{\psi,g}(Z_{2},\alpha_{o})[g_{o,n}% -g_{o},u_{g_{n}}^{\ast}]\right\vert \right] =o(n^{-1/2})$; (iv) let $F_{2,n}^{\ast}$ denote an envelope of $\mathcal{F}_{2,n}^{\ast}$, then \[ \sup_{Q}N(\varepsilon\left\Vert F_{2,n}^{\ast}\right\Vert _{Q,2}% ,\mathcal{F}_{2,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{CK}\ \text{for any }\varepsilon\in(0,1]; \] (v) $\max_{j=1,2,}(\sup_{f\in\mathcal{F}_{j,n}^{\ast}}\mathbb{E}\left[ f^{2}\right] +(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{j,n}^{2}(z_{2}% )|\log(n)n^{-1})^{1/2}((K+L)\log(n))^{1/2}=o(1)$. \end{assumption} \begin{lemma} \label{L-SA-L1} Under Assumptions \ref{L-SA-1}-\ref{L-SA-3}, Assumption 3.2 in HLR holds. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-SA-L1}]By Assumptions \ref{L-SA-1}.(i)-(ii), and the triangle inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \begin{array} [c]{c}% \psi(Z_{2,i},g^{\ast},h)-\psi(Z_{2,i},g,h)\\ -\Delta_{\psi}(Z_{2,i},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}% ^{2}r_{\psi,g}(Z_{2,i},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}] \end{array} \right] \right\vert \leq C\kappa_{n}^{2}n^{-1}\sum_{i=1}^{n}\Lambda _{1,n}(Z_{2,i}) \label{PL-SA-L1-1}% \end{equation} which together with $\mathbb{E}\left[ \Lambda_{1,n}(Z_{2})\right] \leq C$ and the Markov inequality implies that \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \begin{array} [c]{c}% \psi(Z_{2,i},g^{\ast},h)-\psi(Z_{2,i},g,h)\\ -\Delta_{\psi}(Z_{2,i},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}% ^{2}r_{\psi,g}(Z_{2,i},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}] \end{array} \right] \right\vert =O_{p}(\kappa_{n}^{2}). \label{PL-SA-L1-2}% \end{equation} Similarly, by Assumptions \ref{L-SA-1}.(i)-(ii), and the triangle inequality, \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ \begin{array} [c]{c}% \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\\ -\Delta_{\psi}(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}% r_{\psi,g}(Z_{2},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}] \end{array} \right] \right\vert =O(\kappa_{n}^{2}), \label{PL-SA-L1-3}% \end{equation} which together with (\ref{PL-SA-L1-2}) implies that \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \begin{array} [c]{c}% \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\\ -\Delta_{\psi}(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}% r_{\psi,g}(Z_{2},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}] \end{array} \right\} \right\vert =O_{p}(\kappa_{n}^{2}). \label{PL-SA-L1-4}% \end{equation} By Assumptions \ref{L-SA-1}.(iii), the triangle inequality and the Markov inequality,\ \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ r_{\psi,g}% (Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]-r_{\psi,g}(Z_{2},\alpha _{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right\} \right\vert =O_{p}(1) \label{PL-SA-L1-4a}% \end{equation} which together with Assumptions \ref{L-SA-1}.(iv), the triangle inequality and the Markov inequality implies that \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ r_{\psi,g}% (Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right\} \right\vert =O_{p}(1). \label{PL-SA-L1-5}% \end{equation} Combining the results in (\ref{PL-SA-L1-4}) and (\ref{PL-SA-L1-5}), and then applying the triangle inequality, we prove condition (12) of Assumption 3.2.(i) in HLR. By Assumptions \ref{L-SA-2}.(ii), \ref{L-SA-2}.(v)-(vi), the triangle inequality and the Markov inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}% (Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast }]-r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6a}% \end{equation} Similarly, by Assumptions \ref{L-SA-2}.(iv)-(vi), the triangle inequality and the Markov inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ r_{\psi,g}% (Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]-r_{\psi,g}(Z_{2},g_{o}% ,h_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6b}% \end{equation} By Assumption \ref{L-SA-3}.(iii), the triangle inequality and the Markov inequality,% \begin{equation} \left\vert \mu_{n}\left\{ r_{\psi,g}(Z_{2},\alpha_{o})[g_{o,n}-g_{o}% ,u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6c}% \end{equation} By Assumptions \ref{L-SA-3}.(iv)-(v), we can use Lemma 22 in Belloni, et. al (2016) to show that \begin{equation} \sup_{g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ r_{\psi,g}(Z_{2}% ,\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}% (n^{-1/2}), \label{PL-SA-L1-6d}% \end{equation} which together with (\ref{PL-SA-L1-6c}) implies that \begin{equation} \sup_{g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ r_{\psi,g}(Z_{2}% ,\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6e}% \end{equation} Collecting the results in (\ref{PL-SA-L1-6a}), (\ref{PL-SA-L1-6b}) and (\ref{PL-SA-L1-6e}), we get% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}% (Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast }]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6f}% \end{equation} By Assumptions \ref{L-SA-2}.(iii), \ref{L-SA-2}.(v)-(vi), the triangle inequality and the Markov inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}% (Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},\alpha_{o})[u_{g_{n}% }^{\ast}]-r_{\psi,h}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6g}% \end{equation} By Assumption \ref{L-SA-3}.(i), the triangle inequality and the Markov inequality,% \begin{equation} \left\vert \mu_{n}\left\{ r_{\psi,h}(Z_{2},\alpha_{o})[h_{o,n}-h_{o}% ,u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6h}% \end{equation} By Assumptions \ref{L-SA-3}.(ii) and \ref{L-SA-3}.(v), we can use Lemma 22 in Belloni, et. al (2016) to show that% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ r_{\psi,h}(Z_{2}% ,\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}% (n^{-1/2}), \label{PL-SA-L1-6i}% \end{equation} which together with (\ref{PL-SA-L1-6h}) implies that \begin{equation} \sup_{g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ r_{\psi,h}(Z_{2}% ,\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6j}% \end{equation} Collecting the results in (\ref{PL-SA-L1-6g}) and (\ref{PL-SA-L1-6j}), we get% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}% (Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},\alpha_{o})[u_{g_{n}% }^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6k}% \end{equation} Combining the results in (\ref{PL-SA-L1-6f}) and (\ref{PL-SA-L1-6k}), and then applying the triangle inequality, we immediately prove condition (13) of Assumption 3.2.(i) in HLR. By Assumptions \ref{L-SA-1}.(ii)-(iv), \begin{equation} \mathbb{E}\left[ \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\right] =\pm \kappa_{n}\mathbb{E}\left[ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]\right] +O(\kappa_{n}^{2}), \label{PL-SA-L1-10}% \end{equation} uniformly over $\alpha\in\mathcal{N}_{n}$. As $\mathbb{E}\left[ \Delta_{\psi }(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}]\right] =0$, by Assumptions \ref{L-SA-2}.(ii)-(iv) and \ref{L-SA-2}.(vi) \begin{align} & \mathbb{E}\left[ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]\right] \nonumber\\ & =\mathbb{E}\left[ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi }(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}% }^{\ast}]\right] \nonumber\\ & +\mathbb{E}\left[ \Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}% ]-\Delta_{\psi}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}(Z_{2}% ,g_{o},h_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right] \nonumber\\ & +\mathbb{E}\left[ r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast }]-r_{\psi,g}(Z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right] \nonumber\\ & +\mathbb{E}\left[ r_{\psi,g}(Z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast }]\right] +\mathbb{E}\left[ r_{\psi,h}(Z_{2},g_{o},h_{o})[h-h_{o},u_{g_{n}% }^{\ast}]\right] \nonumber\\ & =\mathbb{E}\left[ r_{\psi,g}(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast }]\right] +\mathbb{E}\left[ r_{\psi,h}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}% }^{\ast}]\right] +o(n^{-1/2})\nonumber\\ & =\langle g-g_{o},u_{g_{n}}^{\ast}\rangle_{\psi}+\Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] +o(n^{-1/2}) \label{PL-SA-L1-11}% \end{align} where the second equality is by the definition of the inner product $\langle\cdot,\cdot\rangle_{\psi}$ and the functional $\Gamma(\alpha _{o})\left[ \cdot,\cdot\right] $.\ By Assumption \ref{L-SA-1}.(v), (\ref{PL-SA-L1-10}), (\ref{PL-SA-L1-11}) and the definition of $K_{\psi}% (g,h)$, we have% \begin{equation} K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\left[ \langle g-g_{o}% ,u_{g_{n}}^{\ast}\rangle_{\psi}+\Gamma(\alpha_{o})\left[ h-h_{o,n},u_{g_{n}% }^{\ast}\right] \right] +O(\kappa_{n}^{2}). \label{PL-SA-L1-12}% \end{equation} By the definition of $||\cdot||_{\psi}$ and Assumption \ref{L-SA-1}.(iv), \begin{equation} \frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}=\langle g-g_{o},\pm\kappa_{n}u_{g_{n}}^{\ast}\rangle_{\psi}+O(\kappa_{n}^{2}). \label{PL-SA-L1-13}% \end{equation} Collecting the results in (\ref{PL-SA-L1-12}) and (\ref{PL-SA-L1-13}), we immediately prove Assumption 3.2.(ii) in HLR. \end{proof} We next provide sufficient conditions for Assumptions 3.2 and 3.4 in HLR when the criterion function in the second-step M estimation takes the following form% \begin{equation} \psi(Z_{2},g,h)=\tau(Z_{1},h)\psi^{\ast}(Z_{2},g,h). \label{L-SA-D1}% \end{equation} We will assume that Assumptions \ref{L-SA-1}.(i) and \ref{L-SA-2}.(i) hold for $\psi^{\ast}(Z_{2},g,h)$. Define% \[ \Delta_{\psi}^{\ast}(z_{2},\alpha)[v_{g,1}]=\left. \frac{\partial\psi^{\ast }(z_{2},g+\tau v_{g,1},h)}{\partial\tau}\right\vert _{\tau=0}\text{ and }r_{\psi,g}^{\ast}(z_{2},\alpha)[v_{g,1},v_{g,2}]=\left. \frac{\partial \Delta_{\psi}^{\ast}(z_{2},g+\tau v_{g,2},h)[v_{g,1}]}{\partial\tau }\right\vert _{\tau=0}, \] for any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha\in\mathcal{N}_{\alpha}$ and any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$. Then we have \[ \Delta_{\psi}(z_{2},\alpha)[v_{g,1}]=\tau(z_{1},h)\Delta_{\psi}^{\ast}% (z_{2},\alpha)[v_{g,1}]\text{ and }r_{\psi,g}(z_{2},\alpha)[v_{g,1}% ,v_{g,2}]=\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha)[v_{g,1},v_{g,2}] \] for any $\alpha\in\mathcal{N}_{\alpha}$ and any $v_{g,1},v_{g,2}\in \mathcal{V}_{2}$. Define% \[ r_{\psi,h}(z_{2},\alpha)[v_{h},v_{g}]=\tau(z_{1},h)r_{\psi,h}^{\ast}% (z_{2},\alpha)[v_{h},v_{g}], \] where \[ r_{\psi,h}^{\ast}(z_{2},\alpha)[v_{h},v_{g}]=\left. \frac{\partial \Delta_{\psi}^{\ast}(z_{2},g,h+\tau v_{h})[v_{g}]}{\partial\tau}\right\vert _{\tau=0}. \] Let $\xi_{n}$ denote a non-decreasing real positive sequence, and $\delta_{\tau,n}^{\ast}$ denote a real positive sequence. \begin{assumption} \label{L-SA-4} (i) sup$_{z_{1}\in\mathcal{Z}_{1},h\in\mathcal{N}_{h,n}}\left[ \left\vert \tau(z_{1},h)\right\vert +\left\vert \tau(z_{1},h_{o})\right\vert \right] \leq C$; (ii) Assumptions \ref{L-SA-1}.(i)-(ii) and \ref{L-SA-1}.(v) hold; (iii) equation (19) in HLR holds; (iv) $\Delta_{\psi}^{\ast}% (z_{2},\alpha)[v_{g}]$ satisfies Assumption \ref{L-SA-2}% .(i);\ (v)\ Assumptions \ref{L-SA-2}.(ii) and \ref{L-SA-2}.(v)-(vi) hold;\ (vi)\ $\sup_{z_{1}\in\mathcal{Z}_{1}}\mathbb{E}\left[ \left. (\Delta_{\psi}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast}])^{2}\right\vert Z_{1}=z_{1}\right] \leq$ $\xi_{n}^{2}$; (vii) \[ \sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}(\tau(Z_{1,i},h)-\tau (Z_{1,i},h_{o}))^{2}=O_{p}(\delta_{\tau,n}^{\ast}) \] where $\delta_{\tau,n}^{\ast}\xi_{n}^{2}=o(1)$. \end{assumption} \begin{assumption} \label{L-SA-5} (i) there exists $\Lambda_{6,n}(z_{2},\alpha)$ such that for any $\alpha\in\mathcal{N}_{n}$ \[ \left\vert \tau(z_{1},h)\left( r_{\psi,g}^{\ast}(z_{2},g_{o},h)[g-g_{o}% ,u_{g_{n}}^{\ast}]-r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}% ^{\ast}]\right) \right\vert \leq\Lambda_{6,n}(z_{2},\alpha); \] (ii) there exists $\Lambda_{7,n}(z_{2},\alpha)$ such that for any $\alpha \in\mathcal{N}_{n}$% \[ \left\vert \tau(z_{1},h)\left( \Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}% }^{\ast}]-\Delta_{\psi}^{\ast}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]-r_{\psi ,h}^{\ast}(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right) \right\vert \leq\Lambda_{7,n}(z_{2},\alpha); \] (iii) \[ \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}% ,h)-\tau(Z_{1},h_{o}))r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}% }^{\ast}]\right] \right\vert =o(n^{-1/2}); \] (iv)% \[ \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ (\tau (Z_{1},h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[g-g_{o}% ,u_{g_{n}}^{\ast}]\right] \right\vert =o(n^{-1/2}); \] (v) there exists $\Lambda_{8,n}(z_{2})$ with $\mathbb{E}\left[ \Lambda _{8,n}(Z_{2})\right] \leq C$% \[ \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \tau(z_{1},h)(r_{\psi,g}^{\ast }(z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]-r_{\psi,g}^{\ast}% (z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}])\right\vert \leq \Lambda_{8,n}(z_{2}); \] (vi) $\mathbb{E}\left[ \left\vert r_{\psi,g}^{\ast}(Z_{2},\alpha _{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right\vert \right] \leq C$; (vii) $\max_{j=6,7}\sup_{\alpha\in\mathcal{N}_{n}}n^{-1}\sum_{i=1}^{n}\Lambda _{j,n}(Z_{2,i},\alpha)=o_{p}(n^{-1/2})$; (viii) $\max_{j=6,7}\sup_{\alpha \in\mathcal{N}_{n}}\mathbb{E}\left[ \Lambda_{j,n}(Z_{2},\alpha)\right] =o(n^{-1/2})$. \end{assumption} Define \begin{align*} \mathcal{F}_{3,n}^{\ast} & =\left\{ z_{2}\mapsto\tau(z_{1},h)r_{\psi ,h}^{\ast}(z_{2},\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]:h\in\mathcal{N}% _{h,n}\right\} ,\\ \mathcal{F}_{4,n}^{\ast} & =\left\{ z_{2}\mapsto\tau(z_{1},h)r_{\psi ,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]:h\in\mathcal{N}% _{h,n},g\in\mathcal{N}_{g,n}\right\} . \end{align*} \begin{assumption} \label{L-SA-6} (i) $\mathbb{E}[|r_{\psi,h}^{\ast}(Z_{2},\alpha_{o}% )[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]|]=o(n^{-1/2})$; (ii) let $F_{3,n}^{\ast}$ denote an envelope of $\mathcal{F}_{3,n}^{\ast}$, then \[ \sup_{Q}N(\varepsilon\left\Vert F_{3,n}^{\ast}\right\Vert _{Q,2}% ,\mathcal{F}_{3,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any }\varepsilon\in(0,1]; \] (iii) $\mathbb{E}\left[ \left\vert r_{\psi,g}^{\ast}(Z_{2},\alpha _{o})[g_{o,n}-g_{o},u_{g_{n}}^{\ast}]\right\vert \right] =o(n^{-1/2})$; (iv) let $F_{4,n}^{\ast}$ denote an envelope of $\mathcal{F}_{4,n}^{\ast}$, then \[ \sup_{Q}N(\varepsilon\left\Vert F_{4,n}^{\ast}\right\Vert _{Q,2}% ,\mathcal{F}_{4,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{C(L+K)}\ \text{for any }\varepsilon\in(0,1]; \] (v) $\max_{j=3,4}(\sup_{f\in\mathcal{F}_{j,n}^{\ast}}\mathbb{E}\left[ f^{2}\right] +(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{j,n}^{\ast}(z_{2}% )|\log(n)n^{-1})^{1/2}((K+L)\log(n))^{1/2}=o(1)$. \end{assumption} By definition, we have $\langle v_{g,1},v_{g,2}\rangle_{\psi}=\mathbb{E}% \left[ \tau(Z_{1},h_{o})r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[v_{g,1}% ,v_{g,2}]\right] $ for any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$. Moreover, by (19) in HLR, \[ \Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] =\mathbb{E}\left[ \tau (Z_{1},h_{o})r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[v_{h},v_{g}]\right] \] for any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}\in\mathcal{V}_{2}$. \begin{lemma} \label{L-SA-L2} Under Assumptions \ref{L-SA-4}-\ref{L-SA-6}, condition (13) of Assumption 3.2, Assumption 3.2.(ii) and Assumption 3.4 in HLR holds. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-SA-L2}]By Assumptions \ref{L-SA-1}.(i)-(ii), we can use the same arguments in the proof of Lemma \ref{L-SA-L1} to show that \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \begin{array} [c]{c}% \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\\ -\Delta_{\psi}(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}% r_{\psi,g}(Z_{2},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}] \end{array} \right\} \right\vert =O_{p}(\kappa_{n}^{2}). \label{PL-SA-L2-0a}% \end{equation} By Assumptions \ref{L-SA-5}.(v), \ref{L-SA-5}.(vii)-(viii), the triangle inequality and the Markov inequality,\ \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \tau(Z_{1}% ,h)(r_{\psi,g}^{\ast}(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}% ]-r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast }])\right\} \right\vert =O_{p}(1). \label{PL-SA-L2-0b}% \end{equation} By Assumptions \ref{L-SA-4}.(i) and \ref{L-SA-5}.(vi), the triangle inequality and the Markov inequality, which together with (\ref{PL-SA-L2-0b}) and the triangle inequality implies that \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \tau(Z_{1}% ,h)r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast }]\right\} \right\vert =O_{p}(1). \label{PL-SA-L2-0c}% \end{equation} Combining the results in (\ref{PL-SA-L2-0a})-(\ref{PL-SA-L2-0c}), and then applying the triangle inequality, we prove condition (13) of Assumption 3.2.(i) in HLR. By Assumption \ref{L-SA-5}.(v), \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ \tau (Z_{1},h)(r_{\psi,g}^{\ast}(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast }]-r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast }])\right] \right\vert =O(1). \label{PL-SA-L2-0d}% \end{equation} By Assumptions \ref{L-SA-4}.(i) and \ref{L-SA-5}.(vi), \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}% ,h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast },u_{g_{n}}^{\ast}]\right] \right\vert =O(1) \label{PL-SA-L2-0e}% \end{equation} and \begin{equation} \left\vert \mathbb{E}\left[ \tau(Z_{1},h_{o})r_{\psi,g}^{\ast}(Z_{2}% ,\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right] \right\vert =O(1), \label{PL-SA-L2-0f}% \end{equation} which together with (\ref{PL-SA-L2-0d}) and the triangle inequality implies that \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ r_{\psi,g}% (Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right] \right\vert =O(1). \label{PL-SA-L2-0g}% \end{equation} By Assumptions \ref{L-SA-1}.(ii), (\ref{PL-SA-L2-0g}) and the triangle inequality, \begin{equation} \mathbb{E}\left[ \tau(Z_{1},h)\left[ \psi^{\ast}(Z_{2},g^{\ast}% ,h)-\psi^{\ast}(Z_{2},g,h)\right] \right] =\pm\kappa_{n}\mathbb{E}\left[ \tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}}^{\ast}]\right] +O(\kappa_{n}^{2}), \label{PL-SA-L2-1}% \end{equation} uniformly over $\alpha\in\mathcal{N}_{n}$.\ By $\mathbb{E}[\tau(Z_{1}% ,h)\Delta_{\psi}^{\ast}(Z_{2},\alpha)[u_{g_{n}}^{\ast}]]=0$,\ Assumptions \ref{L-SA-4}.(v), \ref{L-SA-5}.(i)-(iv) and \ref{L-SA-5}.(viii), \begin{align} & \mathbb{E}\left[ \tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}% }^{\ast}]\right] \nonumber\\ & =\mathbb{E}\left[ \tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g_{o}% ,h)[u_{g_{n}}^{\ast}]\right] \nonumber\\ & \text{ \ \ \ }+\mathbb{E}\left[ \tau(Z_{1},h)r_{\psi,g}^{\ast}(Z_{2}% ,g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]\right] +o(n^{-1/2})\nonumber\\ & =\mathbb{E}\left[ \tau(Z_{1},h)r_{\psi,h}^{\ast}(z_{2},\alpha_{o}% )[h-h_{o},u_{g_{n}}^{\ast}]\right] \nonumber\\ & \text{ \ \ \ }+\mathbb{E}\left[ \tau(Z_{1},h)r_{\psi,g}^{\ast}% (Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right] +o(n^{-1/2})\nonumber\\ & =\mathbb{E}\left[ r_{\psi,h}(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast }]\right] +\mathbb{E}\left[ r_{\psi,g}(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}% }^{\ast}]\right] +o(n^{-1/2})\nonumber\\ & =\Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] +\langle g-g_{o},u_{g_{n}}^{\ast}\rangle_{\psi}+o(n^{-1/2}), \label{PL-SA-L2-2}% \end{align} where the last equality is by the definitions of the inner product $\langle\cdot,\cdot\rangle_{\psi}$ and the functional $\Gamma(\alpha _{o})\left[ \cdot,\cdot\right] $. By Assumption \ref{L-SA-1}.(v), (\ref{PL-SA-L2-1}), (\ref{PL-SA-L2-2}) and the definition of $K_{\psi}(g,h)$, we have% \begin{equation} K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\left[ \langle g-g_{o}% ,u_{g_{n}}^{\ast}\rangle_{\psi}+\Gamma(\alpha_{o})\left[ h-h_{o,n},u_{g_{n}% }^{\ast}\right] \right] +O(\kappa_{n}^{2}). \label{PL-SA-L2-3}% \end{equation} By the definition of $||\cdot||_{\psi}$, Assumptions \ref{L-SA-4}.(i) and \ref{L-SA-5}.(vi), \begin{equation} \frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}=\langle g-g_{o},\pm\kappa_{n}u_{g_{n}}^{\ast}\rangle_{\psi}+O(\kappa_{n}^{2}). \label{PL-SA-L2-4}% \end{equation} Collecting the results in (\ref{PL-SA-L2-3}) and (\ref{PL-SA-L2-4}), we immediately prove Assumption 3.2.(ii) in HLR. We next verify Assumption 3.4 in HLR.\ Assumptions 3.4.(i)-(ii) are assumed directly. By definition,% \begin{align} & \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(z_{2}% ,g_{o},h)[u_{g_{n}}^{\ast}]\nonumber\\ & =\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o,n},u_{g_{n}}% ^{\ast}]\nonumber\\ & +\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g_{o,n}-g_{o},u_{g_{n}% }^{\ast}]\nonumber\\ & +\left[ \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}% (z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}% }^{\ast}]\right] \nonumber\\ & +\tau(z_{1},h)\left( r_{\psi,g}^{\ast}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}% }^{\ast}]-r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast }]\right) . \label{PL-SA-L2-5}% \end{align} By Assumptions \ref{L-SA-4}.(v), \ref{L-SA-2}.(v)-(vi), \ref{L-SA-5}.(i) and \ref{L-SA-5}.(vii)-(viii), and the Markov inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \begin{array} [c]{c}% \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}% ,h)[u_{g_{n}}^{\ast}]\\ -r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}] \end{array} \right\} \right\vert =o_{p}(n^{-1/2}), \label{PL-SA-L2-6}% \end{equation} and% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \tau(Z_{1},h)\left( % \begin{array} [c]{c}% r_{\psi,g}^{\ast}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]\\ -r_{\psi,g}^{\ast}(Z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast}] \end{array} \right) \right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-7}% \end{equation} By Assumptions \ref{L-SA-4}.(i), \ref{L-SA-6}.(iii) and the Markov inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\{ \tau(Z_{1},h)r_{\psi ,g}^{\ast}(Z_{2},\alpha_{o})[g_{o,n}-g_{o},u_{g_{n}}^{\ast}]\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-8}% \end{equation} By Assumptions \ref{L-SA-6}.(iv)-(v), we can use Lemma 22 in Belloni, et. al (2016) to show that% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(Z_{1},h)r_{\psi ,g}^{\ast}(Z_{2},\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-9}% \end{equation} Collecting the results in (\ref{PL-SA-L2-5})-(\ref{PL-SA-L2-9}), and then applying the triangle inequality, we get% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}% (Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast }]\right\} \right\vert =o_{p}(n^{-1/2}), \label{PL-SA-L2-10}% \end{equation} which proves condition (20) in Assumption 3.4.(iii). By definition, \begin{align} & \tau(z_{1},h)(\Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}% ]-\Delta_{\psi}^{\ast}(z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])\nonumber\\ & =\tau(z_{1},h)r_{\psi,h}^{\ast}(z_{2},\alpha_{o})[h-h_{o,n},u_{g_{n}}% ^{\ast}]\nonumber\\ & +\tau(z_{1},h)r_{\psi,h}^{\ast}(z_{2},\alpha_{o})[h_{o,n}-h_{o},u_{g_{n}% }^{\ast}]\nonumber\\ & +\tau(z_{1},h)(\Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}% ]-\Delta_{\psi}^{\ast}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}^{\ast }(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]). \label{PL-SA-L2-11}% \end{align} By Assumptions \ref{L-SA-5}.(ii), \ref{L-SA-5}.(vii)-(viii), the Markov inequality and the triangle inequality,% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \begin{array} [c]{c}% \tau(Z_{1},h)(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}% ]-\Delta_{\psi}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]\\ -r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]) \end{array} \right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-12}% \end{equation} By Assumptions \ref{L-SA-4}.(i), \ref{L-SA-6}.(i), the Markov inequality and the triangle inequality,% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\{ \tau(Z_{1},h)r_{\psi ,h}^{\ast}(Z_{2},\alpha_{o})[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-13}% \end{equation} By Assumptions \ref{L-SA-6}.(ii)-(iii), we can use Lemma 22 in Belloni, et. al (2016) to show that% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\{ \tau(Z_{1},h)r_{\psi,h}% ^{\ast}(Z_{2},\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-14}% \end{equation} Collecting the results in (\ref{PL-SA-L2-11})-(\ref{PL-SA-L2-14}), and then applying the triangle inequality, we get% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(Z_{1}% ,h)(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}^{\ast }(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])\right\} \right\vert =o_{p}(n^{-1/2}), \label{PL-SA-L2-15}% \end{equation} which proves condition (21) in Assumption 3.4.(iii). Finally, Assumptions 3.4.(iv) in HLR follows by Assumptions \ref{L-SA-4}.(vi)-(vii). \end{proof} \section{Some Auxiliary Lemmas for Theorem 5.1 of HLR} For the completeness of this section, we list the sufficient conditions of Theorem 5.1 in HLR. To facilitate the presentation, we first review some notations introduced in Section 5 and Appendix D of HLR. Recall that the basis functions used in the first-step and second-step M estimations are $L\times 1$\ vector $R(x)$ and $K\times1$\ vector $P(\varepsilon)$ respectively. For $j=1,2$, we define $\upsilon_{j,K}=\sup_{\varepsilon\in\mathcal{E}_{\eta}% }\left\Vert \partial^{j}P(\varepsilon)^{\prime}\beta_{o,K}\right\Vert $, where $\mathcal{E}_{\eta}=[a-\eta,b+\eta]$ for some $a0$, and $\beta_{o,K}\in% %TCIMACRO{\U{211d} }% %BeginExpansion \mathbb{R} %EndExpansion ^{K}$ is defined in Assumption \ref{L-A-1}.(iii) below. Let $\mathcal{N}% _{g,n}=\{g\in\mathcal{G}_{n}:\left\Vert g-g_{o}\right\Vert _{2}\leq \delta_{2,n}^{\ast}\log(\log(n))\}$ denote the local neighborhood of $g_{o}$, where $\mathcal{G}_{n}$ denotes the sieve space of estimating $g_{o}$, $\delta_{2,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-\rho_{g}}+\upsilon_{1,K}\delta _{h,n}^{\ast}$ and $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-\rho_{h}}$. For any column vector $a$, let $\Vert a\Vert$ denote its $\ell_{2}$-norm; for any square matrix $A$, the operator norm is denoted by $||A||$; $\omega_{\max}(A)$ and $\omega_{\min}(A)$ denote the largest and smallest eigenvalues of a square matrix $A$, respectively. We use $C$ to denote some generic finite positive constant larger than 1.\ For $d$ a nonnegative integer, let $\left\vert g\right\vert _{d}=\max_{|\tau|\leq d}\sup_{\varepsilon\in\mathcal{E}% }\left\vert \partial^{\tau}g(\varepsilon)\right\vert $ for any $g\in \mathcal{G}$ where $\mathcal{G}$ is the function space containing $g_{o}$. Let $\left\Vert \cdot\right\Vert _{\infty}$ denote the uniform norm. For any function $f$, $\mu_{n}(f)=n^{-1}\sum_{i=1}^{n}\left[ f(Z_{i})-\mathbb{E}% \left[ f(Z_{i})\right] \right] $ denotes the empirical process indexed by $f$. \begin{assumption} \label{L-A-0} (i) The data $\left\{ y_{i},x_{i},s_{i}\right\} _{i=1}^{n}$ is i.i.d.; (ii) $\mathbb{E}\left[ \left. \varepsilon_{i}^{4}\right\vert x_{i}\right] C^{-1}$; (iii)\ there exist $\rho_{h}>0$ and $\gamma_{o,L}\in% %TCIMACRO{\U{211d} }% %BeginExpansion \mathbb{R} %EndExpansion ^{L}$ such that% \[ \left\Vert h_{o,L}-h_{o}\right\Vert _{\infty}=O(L^{-\rho_{h}}) \] where $h_{o,L}\left( \cdot\right) \equiv R\left( \cdot\right) ^{\prime }\gamma_{o,L}$; (iv) the eigenvalues of $Q_{L}$ are between $C^{-1}$ and $C$ for all $L$; (v) there exists a nondecreasing sequence $\zeta_{L}$ such that $\sup_{x\in\mathcal{X}}\left\Vert R(x)\right\Vert \leq\zeta_{L}$. \end{assumption} \begin{assumption} \label{L-A-1} (i) $\mathbb{E}[\left. u_{i}^{4}\right\vert \varepsilon_{i}]C^{-1}$; (ii) $g_{o}(\varepsilon)$ is twice continuously differentiable; (iii) there exist $\rho_{g}>0$ and $\beta_{o,K}\in% %TCIMACRO{\U{211d} }% %BeginExpansion \mathbb{R} %EndExpansion ^{K}$ such that% \[ \left\vert g_{o,K}-g_{o}\right\vert _{d}=O(K^{-\rho_{g}}) \] where $g_{o,K}\left( \cdot\right) =P\left( \cdot\right) ^{\prime}% \beta_{o,K}$ and $d=1$; (iv) the eigenvalues of $Q_{K}$ are between $C^{-1}$ and $C$ for all $K$; (v) for $j=0,1,2$, there exists a nondecreasing sequence $\xi_{j,K}$ such that $\sup_{\varepsilon\in\mathcal{E}_{\eta}}\left\Vert \partial^{j}P(\varepsilon)\right\Vert \leq\xi_{j,K}$. \end{assumption} \begin{assumption} \label{L-A-2} (i)\ $||v_{g_{n}}^{\ast}||_{2}\geq C$ for all $n$;\ (ii) the functional $\rho(\cdot)$ satisfies \[ \sup_{g\in\mathcal{N}_{g,n}}\left\vert \frac{\rho(g)-\rho(g_{o})-\partial \rho(g_{o})[g-g_{o}]}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}\right\vert =o(n^{-1/2}); \] (iii) $\left\vert \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\partial \rho(g_{o})[g_{o,n}-g_{o}]\right\vert =o(n^{-1/2})$; (iv) $\sup_{g\in \mathcal{N}_{g,n}}\left\Vert \partial\rho(g)[P]-\partial\rho(g_{o}% )[P]\right\Vert =o(1)$. \end{assumption} \begin{assumption} \label{L-A-3} The following conditions hold: (i)\ $n^{-1/2}(K+L)^{1/2}(\xi_{0,K}+\zeta_{L})(\log(n))^{1/2}=o(1)$; (ii)\ $n^{-1}(L\xi_{1,K}^{2}\log(n)+\zeta_{L}\xi_{1,K})=o(1)$; (iii)\ $n^{-1/2}\zeta_{L}(L\xi_{2,K}+L^{1/2}\xi_{1,K})(n^{-1/2}K^{1/2}% +K^{-\rho_{g}}+v_{1,K}n^{-1/2}L^{1/2})\log(n)=o(1)$; (iv)\ $n^{-1/2}\zeta_{L}(L+L^{1/2}v_{1,K}+Lv_{2,K})\log(n)=o(1)$; (v)\ $nL^{1-2\rho_{h}}+K^{-\rho_{g}}=o(1)$. \end{assumption} \begin{assumption} \label{L-A-4} The following conditions hold: (i) $||v_{g_{n}}^{\ast}||_{2}\leq C$ for all $n$. (ii) $(n^{-1}K\xi_{1,K}^{2}+(\zeta_{L}^{2}+\xi_{0,K}^{2}+\xi_{1,K}% ^{2})K^{-2\rho_{g}})\log(n)=o(1)$; (iii) $n^{-1}(\zeta_{L}^{2}+\xi_{0,K}^{2}+\xi_{1,K}^{2})\upsilon_{1,K}% ^{2}L\log(n)=o(1)$. \end{assumption} \begin{lemma} \label{AP-AL-1} Under Assumptions \ref{L-A-0}, \ref{L-A-1}.(iv)-(v), \ref{L-A-3}.(i) and \ref{L-A-3}.(v), we have% \[ \left\Vert \widehat{Q}_{n,K}-Q_{K}\right\Vert =O_{p}(\xi_{1,K}^{2}\delta _{h,n}^{\ast2}+\xi_{1,K}\delta_{h,n}^{\ast}+n^{-1/2}\xi_{0,K}(\log K)^{1/2}), \] where $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-\rho_{h}}$. \end{lemma} \begin{proof} [Proof of Lemma \ref{AP-AL-1}]Let $B_{K}=\{ \lambda_{K}\in% %TCIMACRO{\U{211d} }% %BeginExpansion \mathbb{R} %EndExpansion ^{K}:\lambda_{K}^{\prime}\lambda_{K}=1\}$. Under Assumptions \ref{L-A-0}.(i), \ref{L-A-1}.(iv)-(v) and \ref{L-A-3}.(i), we can invoke Lemma 6.2 of Belloni, et al. (2015) to get% \begin{equation} \sup_{\lambda_{K}\in B_{K}}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon_{i})\right\vert ^{2}\right] -\mathbb{E}% \left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon_{i})\right\vert ^{2}\right] \right\vert =O_{p}(n^{-1/2}\xi_{0,K}(\log K)^{1/2}), \label{P-AP-AL1-0}% \end{equation} which (together with Assumption \ref{L-A-3}.(i)) further implies that% \begin{equation} \left\Vert Q_{n,K}-Q_{K}\right\Vert =o_{p}(1) \label{P-AP-AL1-0A}% \end{equation} Under Assumptions \ref{L-A-0} and \ref{L-A-3}.(i), arguments in the proof of Theorem 4.1 in Belloni et al.\ (2015) show that% \begin{equation} \left\Vert \widehat{\gamma}_{n}-\gamma_{o,L}\right\Vert =O_{p}(\delta _{h,n}^{\ast}), \label{P-AP-AL1-1A}% \end{equation} which together with Assumptions \ref{L-A-0}.(iii)-(iv), and (\ref{P-L3-AP-8}) below (which is proved under Assumptions \ref{L-A-0} and \ref{L-A-3}.(i)) implies that \begin{align} n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat{h}_{n}(x_{i})-h_{o}% (x_{i})\right\vert ^{2}\right] & \leq2n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat{h}_{n}(x_{i})-h_{o,L}(x_{i})\right\vert ^{2}\right] +2n^{-1}\sum_{i=1}^{n}\left[ \left\vert h_{o,L}(x_{i})-h_{o}(x_{i}% )\right\vert ^{2}\right] \nonumber\\ & =2(\widehat{\gamma}_{n}-\gamma_{o,L})^{\prime}Q_{n,L}(\widehat{\gamma}% _{n}-\gamma_{o,L})+O(L^{-2\rho_{h}})\nonumber\\ & \leq\omega_{\min}(Q_{n,L})\left\Vert \widehat{\gamma}_{n}-\gamma _{o,L}\right\Vert ^{2}+O(L^{-2\rho_{h}})=O_{p}(\delta_{h,n}^{\ast2}). \label{P-AP-AL1-1}% \end{align} Then by (\ref{P-AP-AL1-1}), and the definition of $\widehat{\varepsilon}_{i}$,% \begin{equation} n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat{\varepsilon}_{i}% -\varepsilon_{i}\right\vert ^{2}\right] =n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat{h}_{n}(x_{i})-h_{o}(x_{i})\right\vert ^{2}\right] =O_{p}(\delta_{h,n}^{\ast2}). \label{P-AP-AL1-2}% \end{equation} Using (\ref{P-AP-AL1-1A}), Assumptions \ref{L-A-0}.(iii), (v) and \ref{L-A-3}.(i), \ref{L-A-3}.(v), we have% \begin{align} \left\Vert \widehat{h}_{n}-h_{o}\right\Vert _{\infty} & \leq\left\Vert \widehat{h}_{n}-h_{o,K}\right\Vert _{\infty}+\left\Vert h_{o,K}-h_{o}% \right\Vert _{\infty}\nonumber\\ & =\left\Vert R(x)^{\prime}(\widehat{\gamma}_{n}-\gamma_{o,L})\right\Vert _{\infty}+O(K^{-\rho_{h}})\nonumber\\ & \leq\zeta_{L}\left\Vert \widehat{\gamma}_{n}-\gamma_{o,L}\right\Vert +O(K^{-\rho_{h}})=O_{p}(\zeta_{L}\delta_{h,n}^{\ast}), \label{P-AP-AL1-3}% \end{align} which implies that% \begin{equation} \max_{i\leq n}\left\vert \widehat{\varepsilon}_{i}-\varepsilon_{i}\right\vert =\max_{i\leq n}\left\vert \widehat{h}_{n}(x_{i})-h_{o}(x_{i})\right\vert \leq\left\Vert \widehat{h}_{n}-h_{o}\right\Vert _{\infty}=o_{p}(1). \label{P-AP-AL1-4}% \end{equation} For any $\lambda_{K}\in B_{K}$, by the mean value expansion, the triangle inequality and the Cauchy-Schwarz inequality,% \begin{align} \left\vert \left\vert \lambda_{K}^{\prime}P(\widehat{\varepsilon}% _{i})\right\vert ^{2}-\left\vert \lambda_{K}^{\prime}P(\varepsilon _{i})\right\vert ^{2}\right\vert & \leq\left\vert \lambda_{K}^{\prime }(P(\widehat{\varepsilon}_{i})-P(\varepsilon_{i}))\right\vert ^{2}+2\left\vert \lambda_{K}^{\prime}(P(\widehat{\varepsilon}_{i})-P(\varepsilon_{i}% ))\lambda_{K}^{\prime}P(\varepsilon_{i})\right\vert \nonumber\\ & =\left\vert \lambda_{K}^{\prime}\partial P(\widetilde{\varepsilon}% _{i})(\widehat{\varepsilon}_{i}-\varepsilon_{i})\right\vert ^{2}+2\left\vert \lambda_{K}^{\prime}\partial P(\widetilde{\varepsilon}_{i})\lambda_{K}% ^{\prime}P(\varepsilon_{i})(\widehat{\varepsilon}_{i}-\varepsilon _{i})\right\vert \nonumber\\ & \leq\left\Vert \partial P(\widetilde{\varepsilon}_{i})\right\Vert ^{2}\left\vert \widehat{\varepsilon}_{i}-\varepsilon_{i}\right\vert ^{2}+2\left\Vert \partial P(\widetilde{\varepsilon}_{i})\right\Vert \left\vert \lambda_{K}^{\prime}P(\varepsilon_{i})(\widehat{\varepsilon}_{i}% -\varepsilon_{i})\right\vert \label{P-AP-AL1-5}% \end{align} where\ $\widetilde{\varepsilon}_{i}$ is between $\widehat{\varepsilon}_{i}$ and $\varepsilon_{i}$ for each $\lambda_{K}\in R^{K}$. By (\ref{P-AP-AL1-2}), Assumption \ref{L-A-1}.(v) and $\widetilde{\varepsilon}_{i}\in\mathcal{E}% _{\eta}$ for all $i\leq n$ wpa1\ (which is implied by (\ref{P-AP-AL1-4})), \begin{equation} \frac{\max_{i\leq n}\left\Vert \partial P(\widetilde{\varepsilon}% _{i})\right\Vert ^{2}}{n}\sum_{i=1}^{n}\left\vert \widehat{\varepsilon}% _{i}-\varepsilon_{i}\right\vert ^{2}=O_{p}(\xi_{1,K}^{2}\delta_{h,n}^{\ast2}). \label{P-AP-AL1-6}% \end{equation} By the Cauchy-Schwarz inequality, \begin{align} & \sup_{\lambda_{K}\in B_{K}}\frac{\max_{i\leq n}\left\Vert \partial P(\widetilde{\varepsilon}_{i})\right\Vert }{n}\sum_{i=1}^{n}\left\vert \lambda_{K}^{\prime}P(\varepsilon_{i})(\widehat{\varepsilon}_{i}% -\varepsilon_{i})\right\vert \nonumber\\ & \leq\sup_{\lambda_{K}\in B_{K}}\max_{i\leq n}\left\Vert \partial P(\widetilde{\varepsilon}_{i})\right\Vert \left( n^{-1}\sum_{i=1}% ^{n}\left\vert \widehat{\varepsilon}_{i}-\varepsilon_{i}\right\vert ^{2}\right) ^{1/2}\left( n^{-1}\sum_{i=1}^{n}\left\vert \lambda_{K}^{\prime }P(\varepsilon_{i})\right\vert ^{2}\right) ^{1/2}\nonumber\\ & =O_{p}(\xi_{1,K}\delta_{h,n}^{\ast}), \label{P-AP-AL1-7}% \end{align} where the equality is by (\ref{P-AP-AL1-6}) and $\sup_{\lambda_{K}\in B_{K}% }n^{-1}\sum_{i=1}^{n}\left\vert \lambda_{K}^{\prime}P(\varepsilon _{i})\right\vert ^{2}=O_{p}(1)$ which is implied by (\ref{P-AP-AL1-0}), $\xi_{0,K}(\log K)^{1/2}n^{-1/2}=o(1)$ and $\sup_{\lambda_{K}\in B_{K}% }\mathbb{E}\left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon)\right\vert ^{2}\right] \leq\omega_{\max}(Q_{K})\leq C$. By (\ref{P-AP-AL1-5}), (\ref{P-AP-AL1-6}) and (\ref{P-AP-AL1-7}), \begin{equation} \sup_{\lambda_{K}\in B_{K}}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \left\vert \lambda_{K}^{\prime}P(\widehat{\varepsilon}_{i})\right\vert ^{2}\right] -n^{-1}\sum_{i=1}^{n}\left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon _{i})\right\vert ^{2}\right] \right\vert =O_{p}(\xi_{1,K}^{2}\delta _{h,n}^{\ast2}+\xi_{1,K}\delta_{h,n}^{\ast}) \label{P-AP-AL1-8}% \end{equation} which together with (\ref{P-AP-AL1-0}) proves the claim of the Lemma. \end{proof} \bigskip \begin{lemma} \label{AP-AL-2} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-3}.(i)-(ii) and \ref{L-A-3}.(v)\ hold. Then we have% \[ \left\Vert \widehat{\beta}_{n}-\beta_{o,K}\right\Vert =O_{p}(K^{1/2}% n^{-1/2}+K^{-\rho_{g}}+\upsilon_{1,K}\delta_{h,n}^{\ast}), \] where $\upsilon_{1,K}=\sup_{\varepsilon\in\mathcal{E}_{\eta}}\left\vert \partial P(\varepsilon)^{\prime}\beta_{o,K}\right\vert $. \end{lemma} \begin{proof} [Proof of Lemma \ref{AP-AL-2}]Let $G_{n}=\left[ g_{o}(\varepsilon_{1}% ),\ldots,g_{o}(\varepsilon_{n})\right] ^{\prime}$, $\widehat{G}_{K,n}=\left[ g_{o,K}(\widehat{\varepsilon}_{1}),\ldots,g_{o,K}(\widehat{\varepsilon}% _{n})\right] ^{\prime}$ and $U_{n}=\left[ u_{1},\ldots,u_{n}\right] ^{\prime}$. By definition, \begin{equation} \widehat{\beta}_{n}=n^{-1}\widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime}% (G_{n}+U_{n})=\beta_{o,K}+n^{-1}\widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime }\left[ (G_{n}-G_{n,K})+(G_{n,K}-\widehat{G}_{n,K})+U_{n}\right] , \label{P-AP-AL2-1}% \end{equation} where $\widehat{Q}_{n,K}=n^{-1}\widehat{P}_{n}^{\prime}\widehat{P}_{n}\ $and $G_{n,K}=\left[ g_{o,K}(\varepsilon_{1}),\ldots,g_{o,K}(\varepsilon _{n})\right] ^{\prime}$. By Assumptions \ref{L-A-3}.(ii) and \ref{L-A-3}.(v), $\xi_{1,K}\delta_{h,n}^{\ast}=o(1)$ which together with Assumption \ref{L-A-3}.(i) and Lemma \ref{AP-AL-1} implies that% \begin{equation} (2C)^{-1}<\omega_{\min}(\widehat{Q}_{n,K})\leq\omega_{\max}(\widehat{Q}% _{n,K})<2C\text{ wpa1.} \label{P-AP-AL2-2}% \end{equation} By (\ref{P-AP-AL2-2}) and Assumption \ref{L-A-1}.(iii),% \begin{align} & n^{-2}(G_{n}-G_{K,n})^{\prime}\widehat{P}_{n}\widehat{Q}_{n,K}^{-2}% \widehat{P}_{n}^{\prime}(G_{n}-G_{K,n})\nonumber\\ & \leq\omega_{\min}^{-1}(\widehat{Q}_{n,K})n^{-2}(G_{n}-G_{K,n})^{\prime }\widehat{P}_{n}\widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime}(G_{n}% -G_{K,n})\nonumber\\ & =\omega_{\min}^{-1}(\widehat{Q}_{n,K})n^{-1}(G_{n}-G_{K,n})^{\prime }\widehat{P}_{n}(\widehat{P}_{n}^{\prime}\widehat{P}_{n})^{-1}\widehat{P}% _{n}^{\prime}(G_{n}-G_{K,n})\nonumber\\ & \leq O_{p}(1)n^{-1}\sum_{i=1}^{n}\left[ \left\vert g_{o}(\varepsilon _{i})-g_{o,K}(\varepsilon_{i})\right\vert ^{2}\right] =O_{p}(K^{-2\rho_{g}}), \label{P-AP-AL2-3}% \end{align} where the first equality is by the definition of $\widehat{Q}_{n,K}$, the second inequality is by the fact that $\widehat{P}_{n}(\widehat{P}_{n}% ^{\prime}\widehat{P}_{n})^{-1}\widehat{P}_{n}^{\prime}$ is an idempotent matrix. Similarly% \begin{align} & n^{-2}(G_{K,n}-\widehat{G}_{K,n})^{\prime}\widehat{P}_{n}\widehat{Q}% _{n,K}^{-2}\widehat{P}_{n}^{\prime}(G_{K,n}-\widehat{G}_{K,n})\nonumber\\ & \leq O_{p}(1)n^{-1}(G_{K,n}-\widehat{G}_{K,n})^{\prime}\widehat{P}% _{n}(\widehat{P}_{n}^{\prime}\widehat{P}_{n})^{-1}\widehat{P}_{n}^{\prime }(G_{K,n}-\widehat{G}_{K,n})\nonumber\\ & \leq O_{p}(1)n^{-1}\sum_{i=1}^{n}\left[ \left\vert g_{o,K}(\varepsilon _{i})-g_{o,K}(\widehat{\varepsilon}_{i})\right\vert ^{2}\right] . \label{P-AP-AL2-4}% \end{align} By the mean value expansion and the Cauchy-Schwarz inequality,% \begin{equation} \left\vert g_{o,K}(\varepsilon_{i})-g_{o,K}(\widehat{\varepsilon}% _{i})\right\vert =\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime }\beta_{o,K}(\widehat{\varepsilon}_{i}-\varepsilon_{i})\right\vert \leq \max_{i\leq n}\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime}% \beta_{o,K}\right\vert \left\vert \widehat{\varepsilon}_{i}-\varepsilon _{i}\right\vert , \label{P-AP-AL2-5}% \end{equation} where $\widetilde{\varepsilon}_{i}$ is between $\varepsilon_{i}$ and $\widehat{\varepsilon}_{i}$. Using (\ref{P-AP-AL2-5}), we get% \begin{equation} n^{-1}\sum_{i=1}^{n}\left[ \left\vert g_{o,K}(\varepsilon_{i})-g_{o,K}% (\widehat{\varepsilon}_{i})\right\vert ^{2}\right] \leq\max_{i\leq n}\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime}\beta _{o,K}\right\vert ^{2}n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat {\varepsilon}_{i}-\varepsilon_{i}\right\vert ^{2}\right] =O_{p}% (\upsilon_{1,K}^{2}\delta_{h,n}^{\ast2}), \label{P-AP-AL2-6}% \end{equation} where the equality is by (\ref{P-AP-AL1-2}) and $\max_{i\leq n}\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime}\beta_{o,K}\right\vert ^{2}=O_{p}(\upsilon_{1,K}^{2})$ which is implied by the definition of $\upsilon_{1,K}$ and $\widetilde{\varepsilon}_{i}\in\mathcal{E}_{\eta}$ for all $i\leq n$ wpa1 (which is implied by (\ref{P-AP-AL1-4})). Combining the results in (\ref{P-AP-AL2-4}) and (\ref{P-AP-AL2-6}), we get% \begin{equation} n^{-2}(G_{K,n}-\widehat{G}_{K,n})^{\prime}\widehat{P}_{n}\widehat{Q}% _{n,K}^{-2}\widehat{P}_{n}^{\prime}(G_{K,n}-\widehat{G}_{K,n})=O_{p}% (\upsilon_{1,K}^{2}\delta_{h,n}^{\ast2}). \label{P-AP-AL2-7}% \end{equation} By Assumptions \ref{L-A-0}.(i) and \ref{L-A-1}.(i) \begin{align} & \mathbb{E}\left[ \left. n^{-2}U_{n}^{\prime}\widehat{P}_{n}\widehat {Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime}U_{n}\right\vert \{x_{i},s_{i}% \}_{i=1}^{n}\right] \nonumber\\ & =tr\left( n^{-2}\widehat{P}_{n}\widehat{Q}_{n,K}^{-1}\widehat{P}% _{n}^{\prime}\mathbb{E}\left[ \left. U_{n}U_{n}^{\prime}\right\vert x_{i},s_{i}\}_{i=1}^{n}\right] \right) \nonumber\\ & \leq\frac{C}{n}tr\left( \widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime }\widehat{P}_{n}/n\right) =O(Kn^{-1}) \label{P-AP-AL2-8}% \end{align} which together with (\ref{P-AP-AL2-2}) and the Markov inequality implies that% \begin{equation} n^{-2}U_{n}^{\prime}\widehat{P}_{n}\widehat{Q}_{n,K}^{-2}\widehat{P}% _{n}^{\prime}U_{n}=O_{p}(Kn^{-1}). \label{P-AP-AL2-9}% \end{equation} Collecting the results in (\ref{P-AP-AL2-1}), (\ref{P-AP-AL2-3}), (\ref{P-AP-AL2-7}) and (\ref{P-AP-AL2-9}), we prove the claim of the Lemma. \end{proof} \bigskip \begin{lemma} \label{L-AP-1} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2} and \ref{L-A-3} hold. Then Assumption 3.1 in HLR holds. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-AP-1}]The the definition of $\left\Vert v_{n}^{\ast }\right\Vert _{sd}^{2}$, Assumptions \ref{L-A-1}.(i) and \ref{L-A-2}.(i), \begin{equation} \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}=\left\Vert v_{\Gamma_{n}}^{\ast }(x)\varepsilon\right\Vert _{2}^{2}+\left\Vert v_{g_{n}}^{\ast}(\varepsilon )u\right\Vert _{2}^{2}\geq\left\Vert v_{g_{n}}^{\ast}(\varepsilon)u\right\Vert _{2}^{2}\geq C^{-1}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{2}\geq C^{-1} \label{P-LAP-1}% \end{equation} for all $n$, which verifies Assumption 3.1.(i) in HLR. Assumption 3.1.(ii) in HLR is directly assumed in Assumption \ref{L-A-2}.(ii). By Lemma \ref{AP-AL-2}, we know that $\delta_{2,n}^{\ast}=n^{-1/2}K^{1/2}+K^{-\rho_{g}% }+\upsilon_{1,K}\delta_{h,n}^{\ast}$, where $\upsilon_{1,K}=\sup _{\varepsilon\in\mathcal{E}_{\eta}}\left\vert \partial P(\varepsilon)^{\prime }\beta_{o,K}\right\vert $. Let $g_{n}=g_{o,K}$, then by Assumption \ref{L-A-1}.(iii), we have $\left\Vert g_{n}-g_{o}\right\Vert _{2}% =O(K^{-\rho_{g}})=O(\delta_{2,n}^{\ast})$. By the definitions of $\left\Vert \cdot\right\Vert _{\varphi}$ and $\left\Vert \cdot\right\Vert _{\psi}$,\ we can set $c_{\varphi}=1$ and $c_{\psi}=1$ such that $\left\Vert v_{h}% \right\Vert _{\varphi}\leq c_{\varphi}\left\Vert v_{h}\right\Vert _{\mathcal{H}}$ and $\left\Vert v_{g}\right\Vert _{\psi}\leq c_{\psi }\left\Vert v_{g}\right\Vert _{\mathcal{G}}$ for any $v_{h}\in\mathcal{V}_{1}$ and $v_{g}\in\mathcal{V}_{2}$.\ This verifies Assumption 3.1.(iii) in HLR. Assumption 3.1.(iv) in HLR is assumed in Assumptions \ref{L-A-2}.(iii). \end{proof} \bigskip \begin{lemma} \label{L-AP-2} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2} and \ref{L-A-3} hold. Then Assumption 3.2 in HLR holds. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-AP-2}]For ease of notation, we define $\varepsilon _{h}=s-h(x)$. By definition,% \begin{align} & \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)-\Delta_{\psi}(Z_{2},g,h)[\pm \kappa_{n}u_{g_{n}}^{\ast}]\nonumber\\ & =-\frac{1}{2}\left[ \left\vert y-g(\varepsilon_{h})\mp\kappa_{n}u_{g_{n}% }^{\ast}(\varepsilon)\right\vert ^{2}\right] +\frac{1}{2}\left[ \left\vert y-g(\varepsilon_{h})\right\vert ^{2}\right] -\left[ y-g(\varepsilon _{h})\right] (\pm\kappa_{n}u_{g_{n}}^{\ast})\nonumber\\ & =-\frac{1}{2}\kappa_{n}^{2}(u_{g_{n}}^{\ast}(\varepsilon))^{2}. \label{P-L2-AP-1}% \end{align} By Assumption \ref{L-A-1}.(i), \begin{equation} \mathbb{E}\left[ (u_{g_{n}}^{\ast}(\varepsilon))^{2}\right] =\frac {\mathbb{E}\left[ |v_{g_{,n}}^{\ast}(\varepsilon)|^{2}\right] }{\left\Vert v_{\Gamma_{n}}^{\ast}(x)\varepsilon\right\Vert _{2}^{2}+\left\Vert v_{g_{n}% }^{\ast}(\varepsilon)u\right\Vert _{2}^{2}}\leq\frac{\mathbb{E}\left[ |v_{g_{,n}}^{\ast}(\varepsilon)|^{2}\right] }{\left\Vert v_{\Gamma_{n}}% ^{\ast}(x)\varepsilon\right\Vert _{2}^{2}+C^{-1}\left\Vert v_{g_{n}}^{\ast }(\varepsilon)\right\Vert _{2}^{2}}\leq C \label{P-L2-AP-2}% \end{equation} which together with the Markov inequality, Assumption \ref{L-A-0}.(i) and (\ref{P-L2-AP-1}) verifies Assumption 3.2.(i) in HLR. By definition, \begin{equation} \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}% ,h_{o})[u_{g_{n}}^{\ast}]=\left( g_{o}(\varepsilon)-g(\varepsilon _{h})\right) u_{g_{n}}^{\ast}. \label{P-L2-AP-3}% \end{equation} Recall that $\mathcal{N}_{h,n}=\{h\in\mathcal{H}_{n}:\left\Vert h-h_{o}% \right\Vert _{2}\leq\delta_{1,n}\}$, where $\delta_{1,n}=\delta_{h,n}^{\ast }\log(\log(n))$. It is clear that for any $h(\cdot)=R(\cdot)^{\prime}% \gamma_{L}\in\mathcal{N}_{h,n}$, we have% \begin{align} \left\Vert h-h_{o}\right\Vert _{\infty} & \leq\left\Vert h-h_{o,L}% \right\Vert _{\infty}+\left\Vert h_{o,L}-h_{o}\right\Vert _{\infty}\nonumber\\ & \leq\left\Vert R(x)^{\prime}(\gamma_{L}-\gamma_{o,L})\right\Vert _{\infty }+CL^{-\rho_{h}}\nonumber\\ & \leq\zeta_{L}\left\Vert \gamma_{L}-\gamma_{o,L}\right\Vert +CL^{-\rho_{h}% }\nonumber\\ & \leq\zeta_{L}\omega_{\min}^{-1/2}(Q_{L})((\gamma_{L}-\gamma_{o,L})^{\prime }Q_{L}(\gamma_{L}-\gamma_{o,L}))^{1/2}+CL^{-\rho_{h}}\nonumber\\ & =\zeta_{L}\omega_{\min}^{-1/2}(Q_{L})\left\Vert h-h_{o,K}\right\Vert _{2}+CL^{-\rho_{h}}\nonumber\\ & \leq\zeta_{L}\omega_{\min}^{-1/2}(Q_{L})\left[ \left\Vert h-h_{o}% \right\Vert _{2}+\left\Vert h_{o,K}-h_{o}\right\Vert _{2}\right] +CL^{-\rho_{h}}\leq C\zeta_{L}\delta_{1,n} \label{P-L2-AP-3A}% \end{align} where the last inequality is by Assumption \ref{L-A-0}.(iii)-(iv) and the definition of $\delta_{1,n}$. Define% \[ \mathcal{F}_{n}=\left\{ f(s,x,h,g):f(s,x,h,g)=\left( g_{o}(\varepsilon )-g(\varepsilon_{h})\right) u_{g_{n}}^{\ast}(\varepsilon)\text{, }% g\in\mathcal{N}_{g,n}\text{, }h\in\mathcal{N}_{h,n}\right\} , \] where $\mathcal{N}_{g,n}=\{g\in\mathcal{G}_{n}:\left\Vert g-g_{o}\right\Vert _{2}\leq\delta_{2,n}\}$ and $\delta_{2,n}=\delta_{2,n}^{\ast}\log(\log(n))$. By Assumptions \ref{L-A-3}.(i) and \ref{L-A-3}.(v), $\zeta_{L}\delta _{1,n}=o(1)$. Hence by (\ref{P-L2-AP-3A}) we can let $n$ sufficiently large such that $\zeta_{L}\delta_{1,n}<\eta/2$ and $\varepsilon_{h}\in \mathcal{E}_{\eta}$ for any $h\in\mathcal{N}_{h,n}$. By the mean value expansion, $g(\varepsilon_{h})-g(\varepsilon)=\partial P(\widetilde {\varepsilon}_{h})^{\prime}\beta(\varepsilon_{h}-\varepsilon)$ where $\widetilde{\varepsilon}_{h}$ is between $\varepsilon_{h}$ and $\varepsilon$. As $\varepsilon_{h}\in\mathcal{E}_{\eta}$ for any $h\in\mathcal{N}_{h,n}$, we have $\widetilde{\varepsilon}_{h}\in\mathcal{E}_{\eta}$. Hence for any $g\left( \cdot\right) =P(\cdot)^{\prime}\beta$ with $g\left( \cdot\right) \in\mathcal{N}_{g,n}$ and any $h\in\mathcal{N}_{h,n}$, we have% \begin{align} \left\vert g(\varepsilon_{h})-g(\varepsilon)\right\vert & \leq\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}(\beta-\beta_{o,K}% )(\varepsilon_{h}-\varepsilon)\right\vert +\left\vert \partial P(\widetilde {\varepsilon}_{h})^{\prime}\beta_{o,K}(\varepsilon_{h}-\varepsilon)\right\vert \nonumber\\ & =\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}(\beta -\beta_{o,K})(h(x)-h_{o}(x))\right\vert +\left\vert \partial P(\widetilde {\varepsilon}_{h})^{\prime}\beta_{o,K}(h(x)-h_{o}(x))\right\vert \nonumber\\ & \leq\left[ \left\Vert \partial P(\widetilde{\varepsilon}_{h})\right\Vert \left\Vert \beta-\beta_{o,K}\right\Vert +\left\vert \partial P(\widetilde {\varepsilon}_{h})^{\prime}\beta_{o,K}\right\vert \right] \left\Vert h-h_{o}\right\Vert _{\infty}\nonumber\\ & \leq(\xi_{1,K}\delta_{2,n}+\upsilon_{1,K})\zeta_{L}\delta_{1,n}\leq (\xi_{1,K}\delta_{1,n}+1)\zeta_{L}\delta_{2,n}\leq C\zeta_{L}\delta_{2,n} \label{P-L2-AP-4}% \end{align} where the first inequality is by the mean value expansion and the triangle inequality, the equality is by the definitions of $\varepsilon_{h}$ and $\varepsilon$, the second inequality is by the Cauchy-Schwarz inequality, the third inequality is by Assumption \ref{L-A-1}.(v), (\ref{P-L2-AP-3A}), the definitions of $\upsilon_{1,K}$ and $\mathcal{N}_{h,n}$, and \begin{equation} \left\Vert \beta-\beta_{o,K}\right\Vert \leq\omega_{\min}^{-1}(Q_{K})\left( \left\Vert g-g_{o}\right\Vert _{2}+\left\Vert g_{o}-g_{o,K}\right\Vert _{2}\right) \leq C\delta_{2,n} \label{P-L2-AP-4A}% \end{equation} which is implied by Assumption \ref{L-A-1}.(iii) and the definition of $\mathcal{N}_{g,n}$, the fourth inequality is because $\upsilon_{1,K}% \delta_{1,n}\leq\delta_{2,n}$ by definition, the last inequality in (\ref{P-L2-AP-4}) is by $\xi_{1,K}\delta_{1,n}=O(1)$ which is implied by Assumptions \ref{L-A-3}.(ii) and \ref{L-A-3}.(v). By the triangle inequality and the Cauchy-Schwarz inequality, \begin{equation} \left\vert g(\varepsilon)-g_{o}(\varepsilon)\right\vert \leq\left\Vert \beta-\beta_{o,K}\right\Vert \xi_{0,K}+\left\Vert g_{o}-g_{o,K}\right\Vert _{\infty}\leq C\xi_{0,K}\delta_{2,n} \label{P-L2-AP-5}% \end{equation} where the last inequality is by Assumption \ref{L-A-1}.(iii) and (\ref{P-L2-AP-4A}). By the definition of $u_{g_{n}}^{\ast}$, Assumptions \ref{L-A-1}.(iv)-(v) and (\ref{P-LAP-1}), \begin{equation} \sup_{\varepsilon\in\mathcal{E}}\left\vert u_{g_{n}}^{\ast}(\varepsilon )\right\vert ^{2}\leq\frac{\xi_{0,K}^{2}\partial\rho(g_{o})\left[ P\right] ^{\prime}Q_{K}^{-2}\partial\rho(g_{o})\left[ P\right] }{C^{-1}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{2}}=\frac{C\xi_{0,K}^{2}\partial\rho (g_{o})\left[ P\right] ^{\prime}Q_{K}^{-2}\partial\rho(g_{o})\left[ P\right] }{\partial\rho(g_{o})[P]^{\prime}Q_{K}^{-1}\partial\rho (g_{o})\left[ P\right] }\leq C\xi_{0,K}^{2}. \label{P-L2-AP-5B}% \end{equation} Combining the results in (\ref{P-L2-AP-4}), (\ref{P-L2-AP-5}) and (\ref{P-L2-AP-5B}), we get \begin{align} \sup_{f\in\mathcal{F}_{n}}\left\Vert f\right\Vert _{\infty} & \leq\sup _{g\in\mathcal{N}_{g,n}\text{, }h\in\mathcal{N}_{h,n}\text{, }\varepsilon \in\mathcal{E}}\left[ \left\vert g(\varepsilon_{h})-g(\varepsilon)\right\vert +\left\vert g(\varepsilon)-g_{o}(\varepsilon)\right\vert \right] \sup_{\varepsilon\in\mathcal{E}}\left\vert u_{g_{n}}^{\ast}(\varepsilon )\right\vert \nonumber\\ & \leq C(\zeta_{L}+\xi_{0,K})\xi_{0,K}\delta_{2,n}\equiv M_{n}. \label{P-L2-AP-6}% \end{align} For any $f\in\mathcal{F}_{n}$, by (\ref{P-L2-AP-4}) and (\ref{P-L2-AP-5}), \begin{align} \mathbb{E}\left[ f^{2}\right] & \leq2E\left[ \left( g(\varepsilon _{h})-g(\varepsilon)\right) ^{2}(u_{g_{n}}^{\ast}(\varepsilon))^{2}\right] +2E\left[ \left( g(\varepsilon)-g_{o}(\varepsilon)\right) ^{2}(u_{g_{n}% }^{\ast}(\varepsilon))^{2}\right] \nonumber\\ & \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\mathbb{E}\left[ (u_{g_{n}}^{\ast}(\varepsilon))^{2}\right] \leq C(\zeta_{L}^{2}+\xi_{0,K}% ^{2})\delta_{2,n}^{2}\equiv d_{n}^{2} \label{P-L2-AP-7}% \end{align} where the last inequality is by (\ref{P-L2-AP-2}). For any $f_{1}% =f(\cdot,h_{1},g_{1})$ and any $f_{2}=f(\cdot,h_{2},g_{2})$ where $h_{1}% ,h_{2}\in\mathcal{N}_{h,n}$ and $g_{1},g_{2}\in\mathcal{N}_{g,n}$, by the triangle inequality, \begin{align} \left\vert f_{1}-f_{2}\right\vert & \leq\left\vert \left( g_{1}% (\varepsilon_{h_{1}})-g_{1}(\varepsilon_{h_{2}})\right) u_{g_{n}}^{\ast }(\varepsilon)\right\vert +\left\vert \left( g_{1}(\varepsilon_{h_{2}}% )-g_{2}(\varepsilon_{h_{2}})\right) u_{g_{n}}^{\ast}(\varepsilon)\right\vert \nonumber\\ & \leq\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}\beta _{1}(\varepsilon_{h_{1}}-\varepsilon_{h_{2}})u_{g_{n}}^{\ast}(\varepsilon )\right\vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert \nonumber\\ & =\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}\left[ (\beta_{1}-\beta_{o,K})+\beta_{o,K}\right] (h_{1}(x)-h_{2}(x))u_{g_{n}}% ^{\ast}(\varepsilon)\right\vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast }(\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert \nonumber\\ & \leq\left[ \left\Vert \partial P(\widetilde{\varepsilon}_{h})\right\Vert \left\Vert \beta_{1}-\beta_{o,K}\right\Vert +\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}\beta_{o,K}\right\vert \right] \left\vert R(x)^{\prime}(\gamma_{1}-\gamma_{2})\right\vert \left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast }(\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert \nonumber\\ & \leq\left[ \xi_{1,K}\delta_{2,n}+\upsilon_{1,K}\right] \zeta _{L}\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert \left\Vert \gamma _{1}-\gamma_{2}\right\Vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast}% (\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert \nonumber\\ & \leq F_{n}(\varepsilon)(\left\Vert \beta_{1}-\beta_{2}\right\Vert +\left\Vert \gamma_{1}-\gamma_{2}\right\Vert ), \label{P-L2-AP-8}% \end{align} where $F_{n}(\varepsilon)=C(\xi_{1,K}\zeta_{L}\delta_{2,n}+\upsilon_{1,K}% \zeta_{L}+\xi_{0,K})\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert $, the equality is by the definitions of $\varepsilon_{h_{1}}$ and $\varepsilon _{h_{2}}$, the fourth inequality is by $\left\Vert \partial P(\widetilde {\varepsilon}_{h})\right\Vert \leq\xi_{1,K}$ and $\left\Vert \beta_{1}% -\beta_{o,K}\right\Vert \leq\delta_{2,n}$ for any $h_{1},h_{2}\in \mathcal{N}_{h,n}$, and \begin{equation} \left\vert R(x)^{\prime}(\gamma_{1}-\gamma_{2})\right\vert \leq\left\Vert R(x)\right\Vert \left\Vert \gamma_{1}-\gamma_{2}\right\Vert \leq\zeta _{L}\left\Vert \gamma_{1}-\gamma_{2}\right\Vert \label{P-L2-AP-8A}% \end{equation} which is implied by the triangle inequality and the definition of $\zeta_{L}$. By (\ref{P-L2-AP-2}), $\left\Vert F_{n}\right\Vert _{2}\leq C(\xi_{1,K}% \zeta_{L}\delta_{2,n}+\upsilon_{1,K}\zeta_{L}+\xi_{0,K})\equiv\xi_{F_{n}}$. Let $H_{[]}\left( u,\mathcal{F}_{n},\left\Vert \cdot\right\Vert _{2}\right) $ denote the $u$-bracketing entropy number of the function space $\mathcal{F}_{n}$ under the $L_{2}$-norm. By Example 19.7 in Van der Vaart (1998), $H_{[]}\left( u\left\Vert F_{n}\right\Vert _{2},\mathcal{F}% _{n},\left\Vert \cdot\right\Vert _{2}\right) \leq(Cu^{-1})^{L+K}$ for all $u\in(0,1)$. Hence% \begin{equation} J_{[]}\left( d_{n},\mathcal{F}_{n},\left\Vert \cdot\right\Vert _{2}\right) =\int_{0}^{d_{n}}(\log H_{[]}\left( u,\mathcal{F}_{n},\left\Vert \cdot\right\Vert _{2}\right) )^{1/2}du\leq C(K+L)^{1/2}(\log(n))^{1/2}d_{n} \label{P-L2-AP-9}% \end{equation} where the inequality is by $d_{n}^{-1}\leq Cn$ and $\xi_{F_{n}}\leq Cn$ which are implied by Assumption \ref{L-A-3}. By (\ref{P-L2-AP-6}), (\ref{P-L2-AP-7}% ), (\ref{P-L2-AP-9}) and Lemma 19.36 in Van der Vaart (1998),% \begin{align} & \mathbb{E}\left[ \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}% ,h_{o})[u_{g_{n}}^{\ast}]\right\} \right\vert \right] \nonumber\\ & \leq\frac{J_{[]}\left( d_{n},\mathcal{F}_{n},\left\Vert \cdot\right\Vert _{2}\right) }{n^{1/2}}\left( 1+\frac{J_{[]}\left( d_{n},\mathcal{F}% _{n},\left\Vert \cdot\right\Vert _{2}\right) }{d_{n}^{2}n^{1/2}}M_{n}\right) \nonumber\\ & \leq C\frac{(K+L)^{1/2}(\log(n))^{1/2}}{n^{1/2}}d_{n}\left( 1+\frac {(K+L)^{1/2}(\log(n))^{1/2}}{d_{n}n^{1/2}}M_{n}\right) \nonumber\\ & \leq C\frac{(K+L)^{1/2}(\log(n))^{1/2}d_{n}}{n^{1/2}}\left( 1+\frac {(K+L)^{1/2}\xi_{0,K}(\log(n))^{1/2}}{n^{1/2}}\right) =o_{p}(1) \label{P-L2-AP-10}% \end{align} where the equality is by Assumptions \ref{L-A-3}.(i), and \ref{L-A-3}.(v). Using (\ref{P-L2-AP-10}) and the Markov inequality, we get \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}% (Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h_{o})[u_{g_{n}}% ^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}), \label{P-L2-AP-11}% \end{equation} which verifies Assumption 3.2.(ii) in HLR. By Assumption \ref{L-A-1}.(i), (\ref{P-L2-AP-2}) and $\mathbb{E}\left[ \left. u\right\vert \varepsilon\right] =0$,% \begin{align} K_{\psi}(g,h)-K_{\psi}(g^{\ast},h) & =\mathbb{E}\left[ -\frac{1}% {2}\left\vert y-g(\varepsilon_{h})\right\vert ^{2}\right] -\mathbb{E}\left[ -\frac{1}{2}\left\vert y-g^{\ast}(\varepsilon_{h})\right\vert ^{2}\right] \nonumber\\ & =\mathbb{E}\left[ -\frac{1}{2}\left\vert y-g(\varepsilon_{h})\right\vert ^{2}\right] -\mathbb{E}\left[ -\frac{1}{2}\left\vert y-g(\varepsilon_{h}% )\mp\kappa_{n}u_{g_{n}}^{\ast}(\varepsilon)\right\vert ^{2}\right] \nonumber\\ & =\mathbb{E}\left[ \kappa_{n}^{2}\frac{(u_{g_{n}}^{\ast}(\varepsilon))^{2}% }{2}\mp\kappa_{n}u_{g_{n}}^{\ast}(\varepsilon)u\pm\kappa_{n}u_{g_{n}}^{\ast }(\varepsilon)(g(\varepsilon_{h})-g_{o}(\varepsilon))\right] \nonumber\\ & =\pm\kappa_{n}\mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)(g(\varepsilon _{h})-g_{o}(\varepsilon))\right] +O(\kappa_{n}^{2}). \label{P-L2-AP-12}% \end{align} By the second order expansion, $g(\varepsilon_{h})-g(\varepsilon)=\partial g(\varepsilon)(\varepsilon_{h}-\varepsilon)+\partial^{2}g(\widetilde {\varepsilon}_{h})(\varepsilon_{h}-\varepsilon)^{2}$, where $\widetilde {\varepsilon}_{h}\in\mathcal{E}_{\eta}$ for any $h\in\mathcal{N}_{h,n}$. For any $g\left( \cdot\right) =P\left( \cdot\right) ^{\prime}\beta \in\mathcal{N}_{g}$, we have% \begin{equation} \left\Vert \beta\right\Vert \leq\left\Vert \beta-\beta_{o,K}\right\Vert +\left\Vert \beta_{o,K}\right\Vert \leq C\delta_{2,n}+\left\Vert \beta _{o,K}\right\Vert \leq C \label{P-L2-AP-12A}% \end{equation} where the second inequality is (\ref{P-L2-AP-4A}), Assumptions \ref{L-A-3}% .(i)-(ii) and \ref{L-A-3}.(v), the third inequality is by \begin{equation} \left\Vert \beta_{o,K}\right\Vert \leq\omega_{\min}^{-1}(Q_{K})\left\Vert g_{o,K}\right\Vert _{2}\leq\omega_{\min}^{-1}(Q_{K})\left[ \left\Vert g_{o,K}-g_{o}\right\Vert _{2}+\left\Vert g_{o}\right\Vert \right] \leq C \label{P-L2-AP-12B}% \end{equation} where the third inequality is by Assumptions \ref{L-A-1}.(ii)-(iv). Note that for any $g(\cdot)=P(\cdot)^{\prime}\beta_{K}\in N_{g,n}$, we have $\left\Vert g-g_{o}\right\Vert _{2}\leq\delta_{2,n}$, which together with Assumption \ref{L-A-1}.(iii) and the definition of $\delta_{2,n}$ implies that \begin{equation} \left\Vert g-g_{o,K}\right\Vert _{2}\leq\left\Vert g-g_{o}\right\Vert _{2}+\left\Vert g_{o,K}-g_{o}\right\Vert _{2}\leq2\delta_{2,n}. \label{P-L2-AP-12C}% \end{equation} By (\ref{P-L2-AP-12C}) and Assumption \ref{L-A-1}.(iv),% \begin{equation} \left\Vert \beta-\beta_{o,K}\right\Vert ^{2}\leq\omega_{\min}^{-1}% (Q_{K})(\beta-\beta_{o,K})^{\prime}Q_{K}(\beta-\beta_{o,K})=\omega_{\min}% ^{-1}(Q_{K})\left\Vert g-g_{o,K}\right\Vert _{2}^{2}\leq C\delta_{2,n}. \label{P-L2-AP-12D}% \end{equation} By (\ref{P-L2-AP-12A}), $\left\vert \partial^{2}g(\widetilde{\varepsilon}% _{h})\right\vert \leq C\xi_{2,K}$, which together with (\ref{P-L2-AP-2}), (\ref{P-AP-AL1-1}), (\ref{P-AP-AL1-3}) and (\ref{P-L2-AP-12D}) implies that% \begin{align} \mathbb{E}\left[ \left\vert \partial^{2}g(\widetilde{\varepsilon}% _{h})(\varepsilon_{h}-\varepsilon)^{2}u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] & \leq\mathbb{E}\left[ \left\vert \partial^{2}P(\widetilde {\varepsilon}_{h})^{\prime}(\beta-\beta_{o,K})(\varepsilon_{h}-\varepsilon )^{2}u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] \nonumber\\ & +\mathbb{E}\left[ \left\vert \partial^{2}P(\widetilde{\varepsilon}% _{h})^{\prime}\beta_{o,K}(\varepsilon_{h}-\varepsilon)^{2}u_{g_{n}}^{\ast }(\varepsilon)\right\vert \right] \nonumber\\ & \leq\left( \xi_{2,K}\left\Vert \beta-\beta_{o,K}\right\Vert +\upsilon _{2,K}\right) \mathbb{E}\left[ \left\vert (\varepsilon_{h}-\varepsilon )^{2}u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] \nonumber\\ & \leq\left( \xi_{2,K}\delta_{2,n}+\upsilon_{2,K}\right) \zeta_{L}% \delta_{1,n}\mathbb{E}\left[ \left\vert (\varepsilon_{h}-\varepsilon )u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] \nonumber\\ & \leq\left( \xi_{2,K}\delta_{2,n}+\upsilon_{2,K}\right) \zeta_{L}% \delta_{1,n}^{2}=o(n^{-1/2}) \label{P-L2-AP-13}% \end{align} for any $g\in\mathcal{N}_{g,n}$ and $h\in\mathcal{N}_{h,n}$, where the equality is by Assumptions \ref{L-A-3}.(iii)-(v). By (\ref{P-L2-AP-13}), \begin{equation} \mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)(g(\varepsilon_{h}% )-g(\varepsilon))\right] =\pm\kappa_{n}\mathbb{E}\left[ u_{g_{n}}^{\ast }(\varepsilon)\partial g(\varepsilon)(\varepsilon_{h}-\varepsilon)\right] +o(n^{-1/2}). \label{P-L2-AP-14}% \end{equation} By Jensen's inequality, the Holder inequality, (\ref{P-L2-AP-2}), Assumptions \ref{L-A-0}.(iii), \ref{L-A-1}.(ii),\ \ref{L-A-3}.(v) and the definition of $h_{o,n}$, \begin{align} \left\vert \mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)\partial g(\varepsilon)(\varepsilon_{h_{o,n}}-\varepsilon)\right] \right\vert & =\left\vert \mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)\partial g(\varepsilon)(h_{o}-h_{o,n})\right] \right\vert \nonumber\\ & \leq C(\mathbb{E}\left[ (u_{g_{n}}^{\ast}(\varepsilon))^{2}\right] \mathbb{E}\left[ (h_{o}-h_{o,n})^{2}\right] )^{1/2}\nonumber\\ & \leq C(\mathbb{E}\left[ (h_{o}-h_{o,n})^{2}\right] )^{1/2}=o(n^{-1/2}). \label{P-L2-AP-14a}% \end{align} Combining the results in (\ref{P-L2-AP-12}), (\ref{P-L2-AP-14}) and (\ref{P-L2-AP-14a}), we get% \begin{equation} K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\Gamma(\alpha_{o})\left[ h-h_{o,n},u_{g_{n}}^{\ast}\right] \pm\kappa_{n}\mathbb{E}\left[ u_{g_{n}% }^{\ast}(\varepsilon)(g(\varepsilon)-g_{o}(\varepsilon))\right] +o(n^{-1}). \label{P-L2-AP-15}% \end{equation} By the definition of $\left\Vert \cdot\right\Vert _{\psi}$ and (\ref{P-L2-AP-2}), \begin{equation} \frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}=\pm\kappa _{n}\mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)(g(\varepsilon )-g_{o}(\varepsilon))\right] +o_{p}(n^{-1}) \label{P-L2-AP-16}% \end{equation} which together with (\ref{P-L2-AP-15}) verifies Assumption 3.2.(iii) in HLR. \end{proof} \bigskip \begin{lemma} \label{L-AP-3} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2} and \ref{L-A-3} hold. Then Assumption 3.3 in HLR holds. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-AP-3}]As the functional value $\rho(g_{o})$ only depends on $g_{o}$, we know that $u_{h_{n}}^{\ast}=0$. By Assumption \ref{L-A-0}.(i),% \begin{equation} \mathbb{E}\left[ (u_{\Gamma_{n}}^{\ast}(x))^{2}\right] \leq\frac{\left\Vert v_{\Gamma_{n}}^{\ast}(x)\right\Vert _{2}^{2}}{\left\Vert v_{\Gamma_{n}}^{\ast }(x)\right\Vert _{2}^{2}C^{-1}+\left\Vert v_{g_{n}}^{\ast}(\varepsilon )u\right\Vert _{2}^{2}}\leq C, \label{P-L3-AP-1}% \end{equation} which together with the H\"{o}lder inequality and Assumption \ref{L-A-0}.(iii) implies that\ \begin{equation} \left\vert \langle h_{o,L}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi }\right\vert \leq\left\Vert h_{o,L}-h_{o}\right\Vert _{2}\left\Vert u_{\Gamma_{n}}^{\ast}\right\Vert _{2}=O(L^{-\rho_{h}}). \label{P-L3-AP-2}% \end{equation} By the definition of $\widehat{h}_{n}$, \begin{equation} \langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi }=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] \left( R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}(S_{n}-H_{n,L}), \label{P-L3-AP-3}% \end{equation} where $H_{n,L}=\left[ h_{o,L}(x_{1}),\ldots,h_{o,L}(x_{n})\right] ^{\prime}% $. By the Cauchy-Schwarz inequality and the H\"{o}lder inequality, we have% \begin{equation} \left\Vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)\right] \right\Vert ^{2}\leq\mathbb{E}\left[ (u_{\Gamma_{n}}^{\ast}(x))^{2}\right] \mathbb{E}\left[ R(x)^{\prime}R(x)\right] \leq CL \label{P-L3-AP-6}% \end{equation} where the second inequality is by (\ref{P-L3-AP-1}) and Assumption \ref{L-A-0}.(iv). Under Assumptions \ref{L-A-0} and \ref{L-A-3}.(i), we can invoke Lemma 6.2 of Belloni, et al. (2015) to get% \begin{equation} \left\Vert Q_{L}-Q_{n,L}\right\Vert =O_{p}(\zeta_{L}(\log L)^{1/2}n^{-1/2}), \label{P-L3-AP-7}% \end{equation} where $Q_{n,L}=n^{-1}R_{n}R_{n}^{\prime}$, which together with Assumption \ref{L-A-3}.(i) implies that \begin{equation} (2C)^{-1}<\omega_{\min}(Q_{n,L})\leq\omega_{\max}(Q_{n,L})<2C\text{ wpa1.} \label{P-L3-AP-8}% \end{equation} By the Cauchy-Schwarz inequality, (\ref{P-L3-AP-6}), (\ref{P-L3-AP-8}) and Assumption \ref{L-A-0}.(iii)% \begin{align} & \left\vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] \left( R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}(H_{n}-H_{n,L})\right\vert ^{2}\nonumber\\ & \leq\left\Vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)\right] \right\Vert ^{2}(H_{n}-H_{n,L})^{\prime}R_{n}^{\prime}\left( R_{n}% R_{n}^{\prime}\right) ^{-2}R_{n}(H_{n}-H_{n,L})\nonumber\\ & \leq O_{p}(Ln^{-1})(H_{n}-H_{n,L})^{\prime}(H_{n}-H_{n,L})=O_{p}% (L^{1-2\rho_{h}}), \label{P-L3-AP-9}% \end{align} which together with $S_{n}-H_{n,L}=(H_{n}-H_{n,L})+e_{n}$ (where $e_{n}=\left[ \varepsilon_{1},\ldots,\varepsilon_{n}\right] ^{\prime}$), (\ref{P-L3-AP-3}) and Assumption \ref{L-A-3}.(v) implies that \begin{equation} \langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi }=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] \left( R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}e_{n}+o_{p}(n^{-1/2}). \label{P-L3-AP-10}% \end{equation} By Assumptions \ref{L-A-0}.(i)-(ii) and \ref{L-A-0}.(iv), and (\ref{P-L3-AP-8}% ),% \begin{align} \mathbb{E}\left[ \left. \left\Vert n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\Vert ^{2}\right\vert \{x_{i}\}_{i=1}^{n}\right] & =\mathbb{E}\left[ \left. n^{-2}e_{n}^{\prime}R_{n}^{\prime}Q_{L}^{-2}R_{n}e_{n}\right\vert \{x_{i}\}_{i=1}^{n}\right] \nonumber\\ & \leq n^{-2}\omega_{\min}^{-2}(Q_{L})tr\left( R_{n}^{\prime}\mathbb{E}% \left[ \left. e_{n}e_{n}^{\prime}\right\vert \{x_{i}\}_{i=1}^{n}\right] R_{n}\right) \nonumber\\ & \leq Cn^{-2}\omega_{\min}^{-2}(Q_{L})tr\left( R_{n}R_{n}^{\prime}\right) \nonumber\\ & \leq Cn^{-1}\omega_{\min}^{-2}(Q_{L})tr\left( Q_{n,L}\right) =O_{p}(Ln^{-1}) \label{P-L3-AP-11}% \end{align} which together with the Markov inequality implies that \begin{equation} \left\Vert n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\Vert =O_{p}(L^{1/2}n^{-1/2}). \label{P-L3-AP-12}% \end{equation} By the Cauchy-Schwarz inequality,% \begin{align} & \left\vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L,n}^{-1}\frac{R_{n}e_{n}}{n}-\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast }(x)R(x)^{\prime}\right] n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\vert ^{2}% \nonumber\\ & =\left\vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L,n}^{-1}\left( Q_{L,n}-Q_{L}\right) n^{-1}Q_{L}^{-1}R_{n}e_{n}% \right\vert \nonumber\\ & \leq\left\Vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime }\right] Q_{L,n}^{-1}\right\Vert \left\Vert Q_{L,n}-Q_{L}\right\Vert \left\Vert n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\Vert \nonumber\\ & =O_{p}(\zeta_{L}(\log L)^{1/2}Ln^{-1})=o_{p}(n^{-1/2}) \label{P-L3-AP-13}% \end{align} where the second equality is by (\ref{P-L3-AP-6}), (\ref{P-L3-AP-7}), (\ref{P-L3-AP-8}) and (\ref{P-L3-AP-11}), the last equality is by Assumption \ref{L-A-3}.(iv). Collecting the results in (\ref{P-L3-AP-10}) and (\ref{P-L3-AP-13}), we get% \begin{equation} \langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi }=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L}% ^{-1}\frac{R_{n}e_{n}}{n}+o_{p}(n^{-1/2}). \label{P-L3-AP-14}% \end{equation} By the definition of $u_{\Gamma_{n}}^{\ast}(x)$, \begin{equation} \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L}% ^{-1}\frac{R_{n}e_{n}}{n}=\mathbb{E}\left[ \partial g_{o}(\varepsilon )v_{g_{n}}^{\ast}(\varepsilon)R(x)^{\prime}\right] Q_{L}^{-1}\frac{R_{n}% e_{n}}{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}}, \label{P-L3-AP-15}% \end{equation} and moreover% \begin{equation} \Delta_{\varphi}(Z_{1,i},h_{o})[u_{\Gamma_{n}}^{\ast}]=\mathbb{E}\left[ \partial g_{o}(\varepsilon)v_{g_{n}}^{\ast}(\varepsilon)R(x)^{\prime}\right] Q_{L}^{-1}\frac{R(x_{i})\varepsilon_{i}}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}. \label{P-L3-AP-16}% \end{equation} Hence we have \begin{equation} \mu_{n}\left\{ \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast}]\right\} =\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L}% ^{-1}\frac{R_{n}e_{n}}{n} \label{P-L3-AP-17}% \end{equation} which together with (\ref{P-L3-AP-2}), (\ref{P-L3-AP-14}) and Assumption \ref{L-A-3}.(v)\ verifies Assumption 3.3.(i) in HLR. By definition, \begin{equation} \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast}]+\Delta_{\psi}(Z_{2}% ,g_{o},h_{o})[u_{g_{n}}^{\ast}]=\frac{v_{\Gamma_{n}}^{\ast}(x)\varepsilon +v_{g_{n}}^{\ast}(\varepsilon)u}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}. \label{P-L3-AP-18}% \end{equation} By the Cauchy-Schwarz inequality, Assumptions \ref{L-A-0}.(iv)-(v), \ref{L-A-1}.(ii) and (\ref{P-LAP-1}),% \begin{align} \frac{\sup_{x\in\mathcal{X}}\left\vert v_{\Gamma_{n}}^{\ast}(x)\right\vert ^{2}}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}} & =\frac{\zeta_{L}^{2}% }{\omega_{\min}^{2}(Q_{L})}\frac{\left\Vert \mathbb{E}\left[ \partial g_{o}(\varepsilon)v_{g_{n}}^{\ast}(\varepsilon)R(x)\right] \right\Vert ^{2}% }{\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}}\nonumber\\ & \leq\frac{C\zeta_{L}^{2}}{\omega_{\min}^{2}(Q_{L})}\frac{\mathbb{E}\left[ (\partial g_{o}(\varepsilon)v_{g_{n}}^{\ast}(\varepsilon))^{2}\right] \mathbb{E}\left[ R(x)^{\prime}R(x)\right] }{\left\Vert v_{g_{n}}^{\ast }\right\Vert _{2}^{2}}\nonumber\\ & \leq\frac{C\zeta_{L}^{2}\sup_{\varepsilon\in\mathcal{E}}(\partial g_{o}(\varepsilon))^{2}}{\omega_{\min}^{2}(Q_{L})}\frac{\mathbb{E}\left[ (v_{g_{n}}^{\ast}(\varepsilon))^{2}\right] \mathbb{E}\left[ R(x)^{\prime }R(x)\right] }{\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{2}}=O(L\zeta _{L}^{2}). \label{P-L3-AP-20}% \end{align} By Assumptions \ref{L-A-0}.(ii), \ref{L-A-1}.(i), \ref{L-A-3}.(i) and \ref{L-A-3}.(iv), and the results in (\ref{P-L2-AP-2}), (\ref{P-L2-AP-5B}), (\ref{P-L3-AP-1}) and (\ref{P-L3-AP-20}),% \begin{align} \frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast}(x)\varepsilon +v_{g_{n}}^{\ast}(\varepsilon)u\right\vert ^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}} & \leq8\frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast}(x)\varepsilon\right\vert ^{4}\right] +\mathbb{E}\left[ \left\vert v_{g_{n}}^{\ast}(\varepsilon)u\right\vert ^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}}\nonumber\\ & \leq C\frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast }(x)\right\vert ^{4}\right] +\mathbb{E}\left[ \left\vert v_{g_{n}}^{\ast }(\varepsilon)\right\vert ^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}}\nonumber\\ & \leq Cn^{-1}(\xi_{0,K}^{2}+L\zeta_{L}^{2})\left( \mathbb{E}\left[ \left\vert u_{\Gamma_{n}}^{\ast}(x)\right\vert ^{2}\right] +\mathbb{E}\left[ \left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert ^{2}\right] \right) \nonumber\\ & =O(\xi_{0,K}^{2}n^{-1}+L\zeta_{L}^{2}n^{-1})=o(1), \label{P-L3-AP-22}% \end{align} which together with Assumption \ref{L-A-0}.(i) and the Linderberge CLT verifies Assumption 3.3.(ii) in HLR. The condition $\varepsilon_{2,n}% =O(\kappa_{n})$ and $\kappa_{n}\delta_{2,n}^{\ast-1}=o(1)$ in Assumption 3.3.(iii) of HLR hold by $\varepsilon_{2,n}=0$ and by $n^{-1/2}\delta _{2,n}^{\ast-1}=O(1)$ respectively. Moreover $||u_{g_{n}}^{\ast}||_{\psi}% ^{2}\leq C$ by the definition of $\left\Vert \cdot\right\Vert _{\psi}$ and (\ref{P-L2-AP-2}). This verifies Assumption 3.3.(iii) in HLR. \end{proof} \bigskip Recall that $\mathcal{N}_{h,n}=\{h\in\mathcal{H}_{n}:\left\Vert h-h_{o}% \right\Vert _{2}\leq\delta_{h,n}^{\ast}\log(\log(n))\}$ and $\mathcal{N}% _{n}=\mathcal{N}_{h,n}\times\mathcal{N}_{g,n}$. In Section 4 of HLR, we define $\mathcal{W}_{1,n}=\{h\in\mathcal{V}_{1,n}:\left\Vert h\right\Vert _{2}% \leq1\}$ and $\mathcal{W}_{2,n}=\{g\in\mathcal{V}_{2,n}:\left\Vert g\right\Vert _{2}\leq1\}$. \begin{lemma} \label{L-AP-4} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2}, \ref{L-A-3} and \ref{L-A-4} hold. Then Assumptions 4.1 and 4.2 in HLR hold. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-AP-4}]Assumptions 4.1.(i) and 4.1.(ii) in HLR hold by the definition of $\left\langle \cdot,\cdot\right\rangle _{\psi}$. By the Cauchy-Schwarz inequality, \begin{align} \sup_{\alpha\in\mathcal{N}_{n}} & \sup_{v_{g_{1}},v_{g_{2}}\in \mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}r_{\psi}(Z_{2,i}% ,\alpha)[v_{g_{1}},v_{g_{2}}]-\mathbb{E}\left[ r_{\psi}(Z_{2},\alpha _{o})[v_{g_{1}},v_{g_{2}}]\right] \right\vert \nonumber\\ & =\sup_{v_{g_{1}},v_{g_{2}}\in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum _{i=1}^{n}v_{g_{1}}(\varepsilon_{i})v_{g_{2}}(\varepsilon_{i})-\mathbb{E}% \left[ v_{g_{1}}(\varepsilon)v_{g_{2}}(\varepsilon)\right] \right\vert \nonumber\\ & \leq\left\Vert Q_{n,K}-Q_{K}\right\Vert =O_{p}(\xi_{0,K}(\log K)^{1/2}n^{-1/2})=o_{p}(1) \label{P-L4-AP-1}% \end{align} where the second equality is by (\ref{P-AP-AL1-0}), the third equality is by Assumption \ref{L-A-3}.(i). This means that Assumption 4.1.(iii) in HLR holds. Assumption 4.1.(iv) in HLR is assumed in Assumption \ref{L-A-2}.(iv). This\ verifies Assumption 4.1 in HLR. Assumptions 4.2.(i) and 4.2.(ii) in HLR hold by the definition of $\left\langle \cdot,\cdot\right\rangle _{\varphi}$. By the Cauchy-Schwarz inequality,% \begin{align} \sup_{h\in\mathcal{N}_{h,n}} & \sup_{v_{h_{1}},v_{h_{2}}\in\mathcal{W}% _{1,n}}\left\vert n^{-1}\sum_{i=1}^{n}r_{\varphi}(Z_{1,i},h)[v_{h_{1}% },v_{h_{2}}]-\mathbb{E}\left[ r_{\varphi}(Z_{1},h_{o})[v_{h_{1}},v_{h_{2}% }]\right] \right\vert \nonumber\\ & =\sup_{v_{h_{1}},v_{h_{2}}\in\mathcal{W}_{1,n}}\left\vert n^{-1}\sum _{i=1}^{n}v_{h_{1}}(x_{i})v_{h_{2}}(x_{i})-\mathbb{E}\left[ v_{h_{1}% }(x)v_{h_{2}}(x)\right] \right\vert \nonumber\\ & \leq\left\Vert Q_{n,L}-Q_{L}\right\Vert =O_{p}(\zeta_{L}(\log L)^{1/2}n^{-1/2})=o_{p}(1) \label{P-L4-AP-2}% \end{align} where the second equality is by (\ref{P-L3-AP-7}), and the third equality is by Assumption \ref{L-A-3}.(i). This means that Assumption 4.2.(iii) in HLR holds. As $\partial\rho(\alpha)[v_{h}]=0$ for any $\alpha$ in this example, Assumption 4.2.(iv) in HLR holds. Under Assumptions \ref{L-A-3}.(v) and \ref{L-A-4},% \begin{equation} \xi_{1,K}\delta_{2,n}\leq\xi_{1,K}(K^{1/2}n^{-1/2}+K^{-\rho_{g}}% +\upsilon_{1,K}L^{1/2}n^{-1/2})\log(\log(n))=o(1). \label{P-L4-AP-2A}% \end{equation} By definition, for any $\alpha\in\mathcal{N}_{n}$, we have \begin{align} & \Gamma_{n}(\alpha)\left[ v_{h},v_{g}\right] -\Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] \nonumber\\ & =n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon_{h,i})-\partial g(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon_{i})\nonumber\\ & +n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon_{i})-\partial g_{o}(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon_{i})\nonumber\\ & +n^{-1}\sum_{i=1}^{n}\partial g_{o}(\varepsilon_{i})v_{h}(x_{i}% )v_{g}(\varepsilon_{i})-\mathbb{E}\left[ \partial g_{o}(\varepsilon )v_{h}(x)v_{g}(\varepsilon)\right] . \label{P-L4-AP-3}% \end{align} By the Cauchy-Schwarz inequality, \begin{align} & \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }% }\left\vert n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})v_{g}(\varepsilon _{i})\right\vert \right\vert ^{2}\nonumber\\ & \leq\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left[ n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})\right\vert ^{2}\times n^{-1}\sum_{i=1}^{n}\left\vert v_{g}(\varepsilon_{i})\right\vert ^{2}\right] \nonumber\\ & \leq\left\Vert Q_{L,n}\right\Vert \left\Vert Q_{K,n}\right\Vert =O_{p}(1) \label{P-L4-AP-4}% \end{align} where the equality is by Assumptions \ref{L-A-0}.(iv), \ref{L-A-1}.(iv), and results in (\ref{P-AP-AL1-0}) and (\ref{P-L3-AP-7}). Recall that $\mathcal{B}_{2,n}^{\ast}\equiv\{v\in\mathcal{V}_{2,n}:\left\Vert v-v_{g_{n}% }^{\ast}\right\Vert _{\psi}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{\psi}% ^{-1}\leq\delta_{v_{g},n}\}$, where $\delta_{v_{g},n}=o(1)$ is some positive sequence such that $\widehat{v}_{g_{n}}^{\ast}\in\mathcal{B}_{2,n}^{\ast}$ wpa1. For any $v_{g}\in\mathcal{B}_{2,n}^{\ast}$, we have \begin{equation} \left\vert \left\Vert v_{g}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast }\right\Vert _{2}^{-1}-1\right\vert \leq\left\Vert v_{g}-v_{g_{n}}^{\ast }\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}=o(1) \label{P-L4-AP-6}% \end{equation} which implies that \begin{equation} \sup_{v_{g}\in\mathcal{B}_{2,n}^{\ast}}\left\Vert v_{g}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}\leq2 \label{P-L4-AP-7}% \end{equation} for all large $n$. By (\ref{P-L4-AP-7}),\ the mean value expansion, the triangle inequality and the Cauchy-Schwarz inequality,% \begin{align} & \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast }\text{ }}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon _{h,i})-\partial g(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon _{i})\right\vert \nonumber\\ & \leq2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in \mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left\vert n^{-1}\sum_{i=1}^{n}\partial P(\widetilde{\varepsilon}_{h,i})^{\prime}% (\beta-\beta_{o,K})(\varepsilon_{h,i}-\varepsilon_{i})v_{h}(x_{i}% )v_{g}(\varepsilon_{i})\right\vert \nonumber\\ & +2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in\mathcal{W}% _{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left\vert n^{-1}\sum _{i=1}^{n}\partial P(\widetilde{\varepsilon}_{h,i})^{\prime}\beta _{o,K}(\varepsilon_{h,i}-\varepsilon_{i})v_{h}(x_{i})v_{g}(\varepsilon _{i})\right\vert \nonumber\\ & \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left[ \xi_{1,K}% \left\Vert \beta-\beta_{o,K}\right\Vert +\upsilon_{1,K}\right] \zeta _{L}\delta_{1,n}\left( \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}% \in\mathcal{W}_{2,n}\text{ }}n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i}% )v_{g}(\varepsilon_{i})\right\vert \right) \nonumber\\ & \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left[ \xi_{1,K}% \delta_{2,n}+\upsilon_{1,K}\right] \zeta_{L}\delta_{1,n}\left( \sup _{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}% n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert \right) , \label{P-L4-AP-8}% \end{align} for any $h\in N_{h,n}$ and any $g\in N_{g,n}$, where the second inequality is by (\ref{P-L2-AP-3A}), the third inequality is by (\ref{P-L2-AP-12D}% ).\textbf{\ }Equation (\ref{P-L4-AP-8}) together with Assumptions \ref{L-A-4}.(iii)-(v), (\ref{P-L4-AP-2A}), and (\ref{P-L4-AP-4}) implies that% \begin{align} & \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}% v_{g}\in\mathcal{B}_{2,n}^{\ast}\text{ }}\left\vert n^{-1}\sum_{i=1}% ^{n}\left[ \partial g(\varepsilon_{h,i})-\partial g(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert \nonumber\\ & =O_{p}((\xi_{1,K}\delta_{2,n}+\upsilon_{1,K})\zeta_{L}\delta_{1,n}% )\nonumber\\ & =O_{p}(n^{-1/2}L^{1/2}\zeta_{L}\xi_{1,K}(n^{-1/2}K^{1/2}+K^{-\rho_{g}% }+\upsilon_{1,K}n^{-1/2}L^{1/2})+n^{-1/2}L^{1/2}\zeta_{L}\upsilon_{1,K}% )=o_{p}(1). \label{P-L4-AP-9}% \end{align} By the triangle inequality, the Cauchy-Schwarz inequality, Assumption \ref{L-A-1}.(iii) and \ref{L-A-1}.(v) \begin{align} \sup_{\varepsilon\in\mathcal{E}}\left\vert \partial g(\varepsilon)-\partial g_{o}(\varepsilon)\right\vert & \leq\sup_{\varepsilon\in\mathcal{E}% }\left\vert \partial g(\varepsilon)-\partial g_{o,K}(\varepsilon)\right\vert +\sup_{\varepsilon\in\mathcal{E}}\left\vert \partial g_{o,K}(\varepsilon )-\partial g_{o}(\varepsilon)\right\vert \nonumber\\ & \leq\xi_{1,K}\left\Vert \beta-\beta_{o,K}\right\Vert +K^{-\rho_{g}}, \label{P-L4-AP-10}% \end{align} which together with the definition of $\mathcal{N}_{n}$ and (\ref{P-L2-AP-4A}) implies that \begin{equation} \sup_{g\in\mathcal{N}_{g,n}}\sup_{\varepsilon\in\mathcal{E}}\left\vert \partial g(\varepsilon)-\partial g_{o}(\varepsilon)\right\vert \leq C\xi _{1,K}\delta_{2,n}+K^{-\rho_{g}}=o(1) \label{P-L4-AP-11}% \end{equation} where the equality is by Assumption \ref{L-A-3}.(v) and (\ref{P-L4-AP-2A}). Using (\ref{P-L4-AP-11}) and the triangle inequality% \begin{align} & \sup_{g\in\mathcal{N}_{g,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}% \in\mathcal{B}_{2,n}^{\ast}\text{ }}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon_{i})-\partial g_{o}(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert \nonumber\\ & \leq\sup_{g\in\mathcal{N}_{g,n}}\sup_{\varepsilon\in\mathcal{E}}\left\vert \partial g(\varepsilon)-\partial g_{o}(\varepsilon)\right\vert \times \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}\text{ }}n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert \nonumber\\ & \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}(\xi_{1,K}\delta _{2,n}+K^{-\rho_{g}})\left( \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}% \in\mathcal{W}_{2,n}\text{ }}n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i}% )v_{g}(\varepsilon_{i})\right\vert \right) =o_{p}(1) \label{P-L4-AP-12}% \end{align} where the equality is by Assumption \ref{L-A-4}.(i) and (\ref{P-L4-AP-4}). By Assumptions \ref{L-A-0}.(i), \ref{L-A-0}.(v) \ref{L-A-1}.(ii), \ref{L-A-1}% .(iv) and the Cauchy-Schwarz inequality, \begin{equation} \mathbb{E}\left[ \left\Vert \mu_{n}\left\{ \partial g_{o}(\varepsilon )R(x)P(\varepsilon)^{\prime}\right\} \right\Vert ^{2}\right] \leq n^{-1}\mathbb{E}\left[ \left\vert \partial g_{o}(\varepsilon)\right\vert ^{2}\left\vert P(\varepsilon)^{\prime}R(x)\right\vert ^{2}\right] \leq CK\zeta_{L}n^{-1}=o(1) \label{P-L4-AP-13}% \end{equation} where the equality is by Assumption \ref{L-A-3}.(i). By the Cauchy-Schwarz inequality, \begin{align} & \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}% }\left\vert \mu_{n}\left\{ \partial g_{o}(\varepsilon)v_{h}(x)v_{g}% (\varepsilon)\right\} \right\vert \nonumber\\ & \leq2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in \mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left\vert \mu _{n}\left\{ \partial g_{o}(\varepsilon)v_{h}(x)v_{g}(\varepsilon)\right\} \right\vert \nonumber\\ & \leq2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left\Vert \mu_{n}\left\{ \partial g_{o}(\varepsilon)R(x)P(\varepsilon)^{\prime}\right\} \right\Vert =o_{p}(1) \label{P-L4-AP-14}% \end{align} where the equality is by Assumption \ref{L-A-4}.(i), (\ref{P-L4-AP-13}) and the Markov inequality. Collecting the results in (\ref{P-L4-AP-3}), (\ref{P-L4-AP-9}), (\ref{P-L4-AP-12}) and (\ref{P-L4-AP-14}), we get% \begin{equation} \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}% \in\mathcal{B}_{2,n}^{\ast}}\left\vert \Gamma_{n}(\alpha)\left[ v_{h}% ,v_{g}\right] -\Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] \right\vert =o_{p}(1). \label{P-L4-AP-15}% \end{equation} By the H\"{o}lder inequality and Assumption \ref{L-A-1}.(ii)% \begin{align} & \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}% }\left\vert \Gamma(\alpha_{o})\left[ v_{h},v_{g}-v_{g_{n}}^{\ast}\right] \right\vert \nonumber\\ & \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in \mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}}\left[ \left\Vert v_{h}\right\Vert _{2}\left\Vert v_{g}-v_{g_{n}}^{\ast}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}\right] \nonumber\\ & \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left\Vert Q_{L}% \right\Vert \sup_{v_{g}\in\mathcal{B}_{2,n}^{\ast}}\left\Vert v_{g}-v_{g_{n}% }^{\ast}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}=o(1) \label{P-L4-AP-16}% \end{align} where the equality is by Assumption \ref{L-A-4}.(i), Assumption \ref{L-A-0}% .(iv) and the definition of $\mathcal{B}_{2,n}^{\ast}$. Combining the results in (\ref{P-L4-AP-15}) and (\ref{P-L4-AP-16}), we verify Assumption 4.2.(v) in HLR. \end{proof} \bigskip \begin{lemma} \label{L-AP-5} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2}, \ref{L-A-3} and \ref{L-A-4} hold. Then Lemma C.3 in HLR holds.. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-AP-5}]By definition for any $h\in\mathcal{N}_{h,n}$,% \begin{equation} \Delta_{\varphi}^{2}(Z_{1},h)[v_{h}]=\varepsilon^{2}v_{h}^{2}(x)+(h(x)-h_{o}% (x))^{2}v_{h}^{2}(x)-2\varepsilon v_{h}^{2}(x)(h(x)-h_{o}(x)). \label{P-L5-AP-1}% \end{equation} By the definitions of $\mathcal{W}_{1,n}$ and the\ operator norm,% \begin{equation} \sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert \mu_{n}\left\{ \varepsilon ^{2}v_{h}^{2}(x)\right\} \right\vert \leq\left\Vert \mu_{n}\left\{ \varepsilon^{2}R(x)R(x)^{\prime}\right\} \right\Vert . \label{P-L5-AP-2}% \end{equation} By Assumptions \ref{L-A-0} and the Cauchy-Schwarz inequality,% \begin{equation} \mathbb{E}\left[ \left\Vert \mu_{n}\left\{ \varepsilon^{2}R(x)R(x)^{\prime }\right\} \right\Vert ^{2}\right] \leq n^{-1}\mathbb{E}\left[ \varepsilon^{4}\left\vert R(x)^{\prime}R(x)\right\vert ^{2}\right] \leq L\zeta_{L}^{2}n^{-1} \label{P-L5-AP-3}% \end{equation} which together with (\ref{P-L5-AP-2}), the Markov inequality and Assumption \ref{L-A-3}.(i) implies that \begin{equation} \sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert \mu_{n}\left\{ \varepsilon ^{2}v_{h}^{2}(x)\right\} \right\vert =o_{p}(1). \label{P-L5-AP-4}% \end{equation} By the definition of $\mathcal{N}_{h,n}$, \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}% \sum_{i=1}^{n}(h(x_{i})-h_{o}(x_{i}))^{2}v_{h}^{2}(x_{i})\nonumber\\ & \leq\left( \sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}\right) \left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum _{i=1}^{n}v_{h}^{2}(x_{i})\right) =O_{p}(\zeta_{L}^{2}\delta_{h,n}^{2}% )=o_{p}(1) \label{P-L5-AP-5}% \end{align} where $\delta_{h,n}^{\ast2}=Ln^{-1}+L^{-2\rho_{h}}$, the first equality is by (\ref{P-L2-AP-3A}) and% \begin{equation} \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}^{2}(x_{i})=O_{p}(1), \label{P-L5-AP-6}% \end{equation} which follows by arguments in showing (\ref{P-L4-AP-4}), the second equality is by Assumptions \ref{L-A-3}.(i) and \ref{L-A-3}.(v). By the Cauchy-Schwarz inequality,% \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert n^{-1}\sum_{i=1}^{n}\varepsilon_{i}v_{h}^{2}(x_{i})(h(x_{i})-h_{o}% (x_{i}))\right\vert ^{2}\nonumber\\ & \leq\left( \sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}\right) \left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum _{i=1}^{n}\varepsilon_{i}^{2}v_{h}^{2}(x_{i})\right) \left( \sup_{v_{h}% \in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}^{2}(x_{i})\right) . \label{P-L5-AP-7}% \end{align} By Assumptions \ref{L-A-0}.(ii) and \ref{L-A-0}.(iv), \begin{equation} \left\vert \sup_{v_{h}\in\mathcal{W}_{1,n}}\mathbb{E}\left[ \varepsilon _{i}^{2}v_{h}^{2}(x_{i})\right] \right\vert \leq\left\Vert \mathbb{E}\left[ \varepsilon^{2}R(x)R(x)^{\prime}\right] \right\Vert \leq C \label{P-L5-AP-8}% \end{equation} which together with (\ref{P-L5-AP-4}) implies that \begin{equation} \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}\varepsilon_{i}^{2}% v_{h}^{2}(x_{i})=O_{p}(1). \label{P-L5-AP-9}% \end{equation} By (\ref{P-L5-AP-6}), (\ref{P-L5-AP-7}), (\ref{P-L5-AP-9}) and the definition of $\mathcal{N}_{h,n}$, \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert n^{-1}\sum_{i=1}^{n}\varepsilon_{i}v_{h}^{2}(x_{i})(h(x_{i})-h_{o}% (x_{i}))\right\vert ^{2}=O_{p}(\zeta_{L}^{2}\delta_{h,n}^{2})=o_{p}(1) \label{P-L5-AP-10}% \end{equation} where $\delta_{h,n}^{\ast2}=Ln^{-1}+L^{-2\rho_{h}}$, the second equality is by Assumptions \ref{L-A-3}.(i) and \ref{L-A-3}.(v). Collecting the results in (\ref{P-L5-AP-1}), (\ref{P-L5-AP-4}), (\ref{P-L5-AP-5}) and (\ref{P-L5-AP-10}% ), we show that Lemma C.3.(i) in HLR holds. By definition% \begin{align} & n^{-1}\sum_{i=1}^{n}\Delta_{\psi}^{2}(Z_{2,i},\alpha)[v_{g}]-\mathbb{E}% \left[ \Delta_{\psi}^{2}(Z_{2},\alpha_{o})[v_{g}]\right] \nonumber\\ & =\mu_{n}\left\{ u^{2}v_{g}^{2}(\varepsilon)\right\} +n^{-1}\sum_{i=1}% ^{n}(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}))^{2}v_{g}^{2}(\varepsilon _{i})\nonumber\\ & -2n^{-1}\sum_{i=1}^{n}u_{i}(g(\varepsilon_{h,i})-g_{o}(\varepsilon _{i}))v_{g}^{2}(\varepsilon_{i}). \label{P-L5-AP-11}% \end{align} Using similar arguments in showing (\ref{P-L5-AP-4}), we can show that% \begin{equation} \sup_{v_{g}\in\mathcal{W}_{2,n}}\mu_{n}\left\{ u^{2}v_{g}^{2}(\varepsilon )\right\} =O_{p}(K\xi_{0,K}n^{-1})=o_{p}(1), \label{P-L5-AP-12}% \end{equation} where the equality is by Assumption \ref{L-A-3}.(i). By (\ref{P-L2-AP-4}) and (\ref{P-L2-AP-5}),% \begin{align} & \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}% \sum_{i=1}^{n}(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}))^{2}v_{g}% ^{2}(\varepsilon_{i})\nonumber\\ & \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\sup_{v_{g}% \in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}^{2}(\varepsilon_{i})\nonumber\\ & =o(1)\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}% ^{2}(\varepsilon_{i})=o_{p}(1) \label{P-L5-AP-13}% \end{align} where the first equality is by% \begin{equation} (\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}=o(1), \label{P-L5-AP-13A}% \end{equation} which is implied by Assumption \ref{L-A-3}.(i), \ref{L-A-3}.(v) and $(\zeta_{L}^{2}+\xi_{0,K}^{2})\upsilon_{1,K}^{2}Ln^{-1}=o(1)$ (which is implied by Assumption \ref{L-A-4}), the second equality in (\ref{P-L5-AP-13}) is by% \begin{equation} \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}^{2}(\varepsilon _{i})=O_{p}(1) \label{P-L5-AP-14}% \end{equation} which follows by arguments in showing (\ref{P-L4-AP-4}). Similarly by (\ref{P-L2-AP-4}) and (\ref{P-L2-AP-5}),% \begin{align} & \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{g}\in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}u_{i}(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}% ))v_{g}^{2}(\varepsilon_{i})\right\vert ^{2}\nonumber\\ & \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\sup_{v_{g}% \in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}u_{i}^{2}v_{g}^{2}(\varepsilon _{i})\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}^{2}% (\varepsilon_{i})\nonumber\\ & =o_{p}(1)\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}u_{i}^{2}% v_{g}^{2}(\varepsilon_{i})=o_{p}(1) \label{P-L5-AP-15}% \end{align} where the first equality is by (\ref{P-L5-AP-13A}) and (\ref{P-L5-AP-14}), the second equality is by \begin{equation} \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}u_{i}^{2}v_{g}% ^{2}(\varepsilon_{i})=O_{p}(1) \label{P-L5-AP-16}% \end{equation} which follows by similar arguments in showing (\ref{P-L5-AP-9}). Collecting the results in (\ref{P-L5-AP-11}), (\ref{P-L5-AP-12}), (\ref{P-L5-AP-13}) and (\ref{P-L5-AP-15}), we show that Lemma C.3.(ii) in HLR holds. By definition% \begin{align} & \Delta_{\varphi}(Z_{1},h)[v_{h}]\Delta_{\psi}(Z_{2},\alpha)[v_{g}% ]-\mathbb{E}_{Z}\left[ \Delta_{\varphi}(Z_{1},h_{o})[v_{h}]\Delta_{\psi }(Z_{2},\alpha_{o})[v_{g}]\right] \nonumber\\ & =u\varepsilon v_{g}(\varepsilon)v_{h}(x)-\mathbb{E}\left[ u\varepsilon v_{g}(\varepsilon)v_{h}(x)\right] \nonumber\\ & +(h(x)-h_{o}(x))uv_{h}(x)v_{g}(\varepsilon)+(g(\varepsilon_{h}% )-g_{o}(\varepsilon))\varepsilon v_{h}(x)v_{g}(\varepsilon)\nonumber\\ & +(g(\varepsilon_{h})-g_{o}(\varepsilon))(h(x)-h_{o}(x))v_{h}(x)v_{g}% (\varepsilon), \label{P-L5-AP-17}% \end{align} for any $\alpha\in\mathcal{N}_{n}$. By the Cauchy-Schwarz inequality and Assumptions \ref{L-A-0}.(i)-(ii), \ref{L-A-0}.(v), \ref{L-A-1}.(i) and \ref{L-A-1}.(v),% \begin{align} \mathbb{E}\left[ \left\Vert \mu_{n}\left\{ u\varepsilon R(x)P(\varepsilon )^{\prime}\right\} \right\Vert ^{2}\right] & =n^{-1}\mathbb{E}\left[ u^{2}\varepsilon^{2}P(\varepsilon)^{\prime}P(\varepsilon)R(x)^{\prime }R(x)\right] \nonumber\\ & \leq n^{-1}\sqrt{\mathbb{E}\left[ \left( u^{2}P(\varepsilon)^{\prime }P(\varepsilon)\right) ^{2}\right] }\sqrt{\mathbb{E}\left[ \left( \varepsilon^{2}R(x)^{\prime}R(x)\right) ^{2}\right] }\nonumber\\ & \leq n^{-1}\sqrt{\xi_{0,K}^{2}\mathbb{E}\left[ P(\varepsilon)^{\prime }P(\varepsilon)\right] }\sqrt{\zeta_{L}^{2}\mathbb{E}\left[ R(x)^{\prime }R(x)\right] }\nonumber\\ & \leq Cn^{-1}\zeta_{L}\xi_{0,K}L^{1/2}K^{1/2}\leq Cn^{-1}(L+K)(\zeta_{L}% ^{2}+K^{2})=o(1), \label{P-L5-AP-18}% \end{align} where the third inequality is by $\mathbb{E}\left[ P(\varepsilon)^{\prime }P(\varepsilon)\right] \leq tr(Q_{K})=O(K)$ and $\mathbb{E}\left[ R(x)^{\prime}R(x)\right] \leq tr(Q_{L})=O(L)$, and the last equality is by Assumption \ref{L-A-3}.(i). By the Cauchy-Schwarz inequality, the Markov inequality and (\ref{P-L5-AP-18}), we have% \begin{equation} \sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}\in\mathcal{W}_{2,n}}\mu_{n}\left\{ u\varepsilon v_{g}(\varepsilon)v_{h}(x)\right\} =o_{p}(1). \label{P-L5-AP-19}% \end{equation} By (\ref{P-L2-AP-4}), (\ref{P-L2-AP-5}) and the Cauchy-Schwarz inequality,% \begin{align} & \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}% \in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}(g(\varepsilon _{h,i})-g_{o}(\varepsilon_{i}))\varepsilon_{i}v_{h}(x_{i})v_{g}(\varepsilon _{i})\right\vert ^{2}\nonumber\\ & \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\left( \sup_{v_{h}% \in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}\varepsilon_{i}^{2}v_{h}^{2}% (x_{i})\right) \left( \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}% ^{n}v_{g}^{2}(\varepsilon_{i})\right) =o_{p}(1) \label{P-L5-AP-20}% \end{align} where the equality is by (\ref{P-L5-AP-13A}), (\ref{P-L5-AP-9})\ and (\ref{P-L5-AP-14}). By the Cauchy-Schwarz inequality,% \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}% \in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}(h(x_{i})-h_{o}% (x_{i}))u_{i}v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert ^{2}\nonumber\\ & \leq\sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}% ^{2}\left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}% ^{2}(x_{i})\right) \left( \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum _{i=1}^{n}u_{i}v_{g}^{2}(\varepsilon_{i})\right) =o_{p}(1) \label{P-L5-AP-21}% \end{align} where the equality is by (\ref{P-L5-AP-6}), (\ref{P-L5-AP-16}) and $\sup _{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}=\zeta _{L}^{2}\delta_{1,n}^{2}=o(1)$ which is implied by Assumption \ref{L-A-3}.(i). Similarly, \begin{align} & \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}% \in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}(h(x_{i})-h_{o}% (x_{i}))(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}))v_{h}(x_{i}% )v_{g}(\varepsilon_{i})\right\vert ^{2}\nonumber\\ & \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\sup_{h\in \mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}\nonumber\\ & \times\left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}% ^{2}(x_{i})\right) \left( \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum _{i=1}^{n}v_{g}^{2}(\varepsilon_{i})\right) \nonumber\\ & =O_{p}((\zeta_{L}^{2}+\xi_{0,K}^{2})\zeta_{L}^{2}\delta_{1,n}^{2}% \delta_{2,n}^{2})=o_{p}(1) \label{P-L5-AP-22}% \end{align} where the first equality is by (\ref{P-L5-AP-6}), (\ref{P-L5-AP-14}) and $\sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}% =\zeta_{L}^{2}\delta_{1,n}^{2}$, the second equality is by (\ref{P-L5-AP-13A}% ), and $\zeta_{L}^{2}\delta_{1,n}^{2}=o(1)$ which is implied by Assumption \ref{L-A-3}.(i). Collecting the results in (\ref{P-L5-AP-17}), (\ref{P-L5-AP-19}), (\ref{P-L5-AP-20}), (\ref{P-L5-AP-21}) and (\ref{P-L5-AP-22}), we show that Lemma C.3.(iii) in HLR holds. \end{proof} \bigskip \begin{lemma} \label{L-AP-6} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2} and \ref{L-A-3} hold. Then Assumption 4.3.(iv) in HLR holds. \end{lemma} \begin{proof} [Proof of Lemma \ref{L-AP-6}]By the definition of $\Delta_{\varphi}% (Z_{1},h_{o})[v_{h}]$, we have% \begin{equation} \sup_{v_{h}\in\mathcal{W}_{1,n}}\mathbb{E}\left[ \left\vert \Delta_{\varphi }(Z_{1},h_{o})[v_{h}]\right\vert ^{2}\right] \leq\left\Vert \mathbb{E}\left[ \varepsilon^{2}R(x)R(x)^{\prime}\right] \right\Vert \leq C\left\Vert Q_{L}\right\Vert \leq C \label{P-L6-AP-1}% \end{equation} where the second inequality is by Assumption \ref{L-A-0}.(ii), the third inequality is by Assumption \ref{L-A-0}.(iv). Similarly, \begin{equation} \sup_{v_{g}\in\mathcal{W}_{2,n}}\mathbb{E}\left[ \left\vert \Delta_{\psi }(Z_{2},\alpha_{o})[v_{g}]\right\vert ^{2}\right] \leq\left\Vert \mathbb{E}\left[ u^{2}P(\varepsilon)P(\varepsilon)^{\prime}\right] \right\Vert \leq C\left\Vert Q_{K}\right\Vert \leq C \label{P-L6-AP-2}% \end{equation} where the second inequality is by Assumption \ref{L-A-1}.(i), the third inequality is by Assumption \ref{L-A-1}.(iv). By $v_{h_{n}}^{\ast}=0$, (\ref{P-LAP-1}), (\ref{P-L2-AP-2}) and (\ref{P-L3-AP-1}), \begin{equation} (\left\Vert v_{h_{n}}^{\ast}\right\Vert _{\varphi}+\left\Vert v_{\Gamma_{n}% }^{\ast}\right\Vert _{\varphi}+\left\Vert v_{g_{n}}^{\ast}\right\Vert _{\psi })\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}=\left\Vert u_{\Gamma_{n}% }^{\ast}\right\Vert _{2}+\left\Vert u_{g_{n}}^{\ast}\right\Vert _{2}\leq C, \label{P-L6-AP-3}% \end{equation} which verifies (4.10) in HLR. By (\ref{P-L6-AP-1}), (\ref{P-L6-AP-2}) and (\ref{P-L6-AP-3}), Assumption 4.3.(iv) in HLR is verified. \end{proof} \section{Verification of Assumptions 3.2 and\ 3.4 in Example 2.1 \label{SA-2}} In this section, we use the nonparametric triangular simultaneous equation model in Newey, Powell and Vella (1999) to illustrate the high-level sufficient conditions for the asymptotic normality of the two-step sieve estimator. The first step nonparametric estimation takes the following form:% \begin{equation} \widehat{h}_{n}=\arg\max_{h\in\mathcal{H}_{n}}-\frac{1}{2n}\sum\limits_{i=1}% ^{n}\left( x_{i}-h\left( w_{1,i}\right) \right) ^{2} \label{A_NPV_1}% \end{equation} where $\mathcal{H}_{n}=\{h:h\left( \cdot\right) =R(\cdot)^{\prime}\gamma$, $\gamma\in\mathbb{R}^{L(n)}\}$. Let $R(w_{1,i})=\left[ r_{1}(w_{1,i}% ),\ldots,r_{L(n)}(w_{1,i})\right] ^{\prime}$\ for $i=1,\ldots,n$, and\ $R_{n}=\left[ R(w_{1,1}),\ldots,R(w_{1,n})\right] $.\ The first step M estimator $\widehat{h}_{n}$ has a closed form expression% \begin{equation} \widehat{h}_{n}(\cdot)=R(\cdot)^{\prime}\left( R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}X_{n}=R(\cdot)^{\prime}\widehat{\gamma}_{n} \label{A_NPV_2}% \end{equation} where\ $X_{n}=\left[ x_{1},\ldots,x_{n}\right] ^{\prime}$. To define the second step M estimation, let $P(w)=\left[ p_{1}(w),\ldots,p_{K(n)}% (w)\right] ^{\prime}$ be a vector of approximating functions of $w=(x,w_{2}^{\prime},u)^{\prime}$ such that each $p_{k}(w)$ depends on $(x,w_{2})$ or on $u$, but not both. From the first step estimator, we calculate $\widehat{u}_{i}=x_{i}-\widehat{h}_{n}(w_{1,i})$ for $i=1,\ldots,n$. Let\ $P(\widehat{w}_{i})=[p_{1}(\widehat{w}_{i}),\ldots,p_{K(n)}(\widehat {w}_{i})]^{\prime}$ and $\widehat{P}_{n}=[\widehat{\tau}_{1}P(\widehat{w}% _{1}),\ldots,\widehat{\tau}_{n}P(\widehat{w}_{n})]^{\prime}$, where $\widehat{w}_{i}=(x_{i},w_{2,i}^{\prime},\widehat{u}_{i})^{\prime}$ and $\widehat{\tau}_{i}=\prod\nolimits_{j=1}^{d_{w_{2}}+2}I\{a_{j}\leq\widehat {w}_{j,i}\leq b_{j}\}$\ for $i=1,\ldots,n$, where $d_{w_{2}}$ denotes the dimension of $w_{2}$ and $\widehat{w}_{j,i}$ is the $j$-th component of $\widehat{w}_{i}$ for $j=1,\ldots,d_{w_{2}}+2$. Let $g_{o}(w)=m_{o}% (x,w_{2})+\lambda_{o}\left( u\right) $ and $\eta=y-m_{o}(x,w_{2}% )-\lambda_{o}\left( u\right) $. By the definition of $\lambda_{o}\left( u\right) $, and the conditional moment restrictions in (3) of HLR, we have% \begin{equation} \mathbb{E[}\eta|x,w_{1}]=0. \label{A_NPV_3}% \end{equation} Let $\mathcal{T}_{w}=\{w:\tau(w)=1\}$ where $\tau(w)=\prod\nolimits_{j=1}% ^{d_{w_{2}}+2}I\{a_{j}\leq w_{j,i}\leq b_{j}\}$. The second step M estimator (of $g_{o}$) is% \begin{equation} \widehat{g}_{n}=\arg\max_{g\in\mathcal{G}_{n}}-n^{-1}\sum\limits_{i=1}% ^{n}\widehat{\tau}_{i}(y_{i}-g(\widehat{w}_{i}))^{2} \label{A_NPV_4}% \end{equation} where $\mathcal{G}_{n}=\{g\left( \cdot\right) :g\left( \cdot\right) =\tau(\cdot)P(\cdot)^{\prime}\beta$, $\beta\in\mathbb{R}^{K(n)}\}$. The second step M estimator $\widehat{g}_{n}$ also has a closed form expression \begin{equation} \widehat{g}_{n}(w)=P(w)^{\prime}(\widehat{P}_{n}^{\prime}\widehat{P}_{n}% )^{-1}\widehat{P}_{n}^{\prime}Y_{n}=P(w)^{\prime}\widehat{\beta}_{n} \label{A_NPV_5}% \end{equation} for any $w\in\mathcal{T}_{w}$, where\ $Y_{n}=\left[ y_{1},\ldots ,y_{n}\right] ^{\prime}$. The plug-in estimator of $\rho(g_{o})$ is $\rho(\widehat{g}_{n})$, where $\rho(\cdot)$ is a linear functional of $g$. We next list the low level sufficient conditions for the asymptotic normality of $\rho(\widehat{g}_{n})$. These assumptions are from Newey, Powell and Vella (1999). \begin{assumption} \label{AA-E-1} $\left\{ (y_{i},x_{i},w_{1,i})\right\} _{i=1}^{n}$ is i.i.d., $var(x|w_{1})$ and $var(y|x,w_{1})$ are bounded. \end{assumption} \begin{assumption} \label{AA-E-2} $w_{1}$ is continuously distributed with density that is bounded away from zero on its support, and the support of $w_{1}$ is a cartesian product of compact, connected intervals. Also $w$ is continuously distributed and its density is bounded away from zero on $\mathcal{T}_{w}$, and $\mathcal{T}_{w}$\ is contained in the interior of the support of $w$. \end{assumption} \begin{assumption} \label{AA-E-3} $h_{o}\left( w_{1}\right) $ is continuously differentiable of order $s_{1}$ on the support of $w_{1}$ and $m_{o}(x,w_{2})$ and $\lambda _{o}(u)$ are Lipschitz and continuous differentiable of order $s$ on $\mathcal{T}_{w}$. \end{assumption} In the rest of the section, we write $L$ and $K$ for $L(n)$ and $K(n)$ respectively for notational simplicity. Following Newey, Powell and Vella (1999), we consider two types of approximating functions for $R(w_{1})$ and $P(w)$:\ the power series and splines. \begin{assumption} \label{AA-E-4} Either (a) for power series, $(K^{3}+K^{2}L)(L^{1/2}% n^{-1/2}+L^{-s_{1}/d_{w_{1}}})=o(1)$; or (b) for splines, $(K^{2}% +KL)(L^{1/2}n^{-1/2}+L^{-s_{1}/d_{w_{1}}})=o(1)$. \end{assumption} By Assumption \ref{AA-E-3}, there exists $\gamma_{o,L}\in\mathbb{R}^{L}$ such that% \begin{equation} \sup_{w_{1}\in\mathcal{W}_{1}}\left\vert h_{o,L}(w_{1})-h_{o}(w_{1}% )\right\vert \leq CL^{-s_{1}/d_{w_{1}}}, \label{AA-AP-1}% \end{equation} where $h_{o,L}(w_{1})=R(w_{1})^{\prime}\gamma_{o,L}$,\ $\mathcal{W}_{1}$ denotes the support of $w_{1}$ and $d_{w_{1}}$ denotes the dimension of $w_{1}$, and there exists $\beta_{o,K}\in\mathbb{R}^{K}$ such that% \begin{equation} \sup_{w\in\mathcal{T}_{w}}\left\vert g_{o,K}(w)-g_{o}(w)\right\vert \leq CK^{-s/d} \label{AA-AP-2}% \end{equation} where $g_{o,K}(w)=P(w)^{\prime}\beta_{o,K}$ and $d$ denotes the dimension of\ $\left( x,w_{2}^{\prime}\right) ^{\prime}$. We next calculate the Riesz representors $v_{g_{n}}^{\ast}$ and $v_{\Gamma _{n}}^{\ast}$. Let $Z_{1,i}=(x_{i},w_{1,i}^{\prime})^{\prime}$ and $\varphi\left( Z_{1,i},h\right) =-\left( x_{i}-h\left( w_{1,i}\right) \right) ^{2}/2$. By definition, $\left\langle v_{h_{1}},v_{h_{2}% }\right\rangle _{\varphi}=\mathbb{E}\left[ v_{h_{1}}(w_{1})v_{h_{2}}% (w_{1})\right] $ for any\ $v_{h_{1}},v_{h_{2}}\in\mathcal{V}_{1}$. Let $Z_{2,i}=(y_{i},x_{i},w_{1,i}^{\prime})^{\prime}$, $u_{h,i}=x_{i}-h(w_{1,i})$ and $w_{h,i}=(x_{i},w_{2,i}^{\prime},u_{h,i})^{\prime}$. The criterion function of the second step estimation is% \[ \psi\left( Z_{2,i},g,h\right) =-\tau(w_{h,i})\left( y_{i}-m\left( x_{i},w_{2,i}\right) -\lambda\left( x_{i}-h(w_{1,i})\right) \right) ^{2}/2. \] By definition, $\langle v_{g_{1}},v_{g_{2}}\rangle_{\psi}=\mathbb{E}\left[ \tau(w)v_{g_{1}}(w)v_{g_{2}}(w)\right] $\ for any $v_{g_{1}},v_{g_{2}}% \in\mathcal{V}_{2}$. By some simple calculation, we get% \[ v_{g_{n}}^{\ast}(\cdot)=\tau(\cdot)P(\cdot)^{\prime}Q_{K}^{-1}\rho(P_{K}), \] where $Q_{K}=\mathbb{E}\left[ \tau(w)P(w)P(w)^{\prime}\right] $ and $\rho(P_{K})=\left[ \rho(p_{1}),\ldots,\rho(p_{K})\right] ^{\prime}$. Moreover, by the conditional moment condition (\ref{A_NPV_3}), we have% \[ \Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] =\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)v_{h}(w_{1})v_{g}(w)\right] \] where $\partial_{u}g_{o}(w)=\partial g_{o}(w)/\partial u$, which implies that% \[ v_{\Gamma_{n}}^{\ast}(\cdot)=R(\cdot)^{\prime}Q_{L}^{-1}\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)v_{g_{n}}^{\ast}(w)\right] =R(\cdot)^{\prime}% Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K}), \] where $H=\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)R(w_{1})P(w)^{\prime }\right] $ and\ $Q_{L}=\mathbb{E}\left[ R(w_{1})R(w_{1})^{\prime}\right] $.\ Using the sieve Riesz representors $v_{g_{n}}^{\ast}$ and $v_{\Gamma_{n}% }^{\ast}$, and the i.i.d. assumption, we have% \begin{align} \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2} & =\text{Var}\left[ n^{-\frac{1}{2}}\sum\nolimits_{i=1}^{n}\left( u_{i}v_{\Gamma_{n}}^{\ast }(w_{1,i})+\eta_{i}\tau(w_{i})v_{g_{n}}^{\ast}(w_{i}\right) )\right] \nonumber\\ & =\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] \label{V_n_EX2}% \end{align} where the second equality is by (\ref{A_NPV_3}). Let $\Sigma_{K}% =\mathbb{E}\left[ \eta^{2}\tau(w)P(w)P(w)^{\prime}\right] $ and $\Sigma _{L}=\mathbb{E}\left[ u^{2}R(w_{1})R(w_{1})^{\prime}\right] $. By the explicit expressions of $v_{g_{n}}^{\ast}$ and $v_{\Gamma_{n}}^{\ast}$,% \begin{align*} \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2} & =\rho(P_{K})^{\prime}% Q_{K}^{-1}H^{\prime}Q_{L}^{-1}\mathbb{E}\left[ u^{2}R(w_{1})R(w_{1})^{\prime }\right] Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})\\ & +\rho(P_{K})^{\prime}Q_{K}^{-1}\mathbb{E}\left[ \eta^{2}\tau (w)P(w)P(w)^{\prime}\right] Q_{K}^{-1}\rho(P_{K})\\ & =\rho(P_{K})^{\prime}Q_{K}^{-1}\left[ \Sigma_{K}+H^{\prime}Q_{L}% ^{-1}\Sigma_{L}Q_{L}^{-1}H\right] Q_{K}^{-1}\rho(P_{K}) \end{align*} which is the same as the variance-covariance matrix $V$ of the two-step estimator defined on page 596 of Newey, Powell and Vella (1999). \begin{assumption} \label{AA-E-5} $\sigma^{2}(x,w_{1})=var(y|x,w_{1})$ is bounded away from zero, $\mathbb{E}[\eta^{4}|x,w_{1}]$ is bounded, and $\mathbb{E}[u^{4}|x,w_{1}]$ is bounded. Also $g_{o}(w)$ is twice continuously differentiable in $u$ with bounded first and second derivatives. \end{assumption} \begin{assumption} \label{AA-E-6} There exists $v_{g}^{\ast}(w)$ and $\beta_{v,K}$ such that $\mathbb{E}[\tau(w)\left\vert v_{g}^{\ast}(w)\right\vert ^{2}]<\infty$, $\rho(g_{o})=\mathbb{E}[\tau(w)v_{g}^{\ast}(w)g_{o}(w)]$, $\rho(p_{k}% )=\mathbb{E}[\tau(w)v_{g}^{\ast}(w)p_{k}(w)]$ and $\mathbb{E}[\tau (w)\left\vert v_{g}^{\ast}(w)-P(w)^{\prime}\beta_{v,K}\right\vert ^{2}]\rightarrow0$ as $K\rightarrow\infty$. \end{assumption} For any $d_{w}\times1$ vector $a$ of nonnegative integers, let $|a|=\sum _{j=1}^{d_{w}}a_{j}$, $\partial^{a}g(w)=\partial^{|a|}g(w)/\partial w_{1}\cdots\partial w_{d_{w}}$. Let $\xi_{\delta,K}$ ($\delta=0,1$) and $\zeta_{L}$ be nondecreasing sequences such that $\max_{|a|\leq\delta}% \sup_{w\in\mathcal{T}_{w}}\left\Vert \partial^{a}P(w)\right\Vert \leq \xi_{\delta,K}$ and $\sup_{w_{1}\in\mathcal{W}_{1}}\left\Vert R(w_{1}% )\right\Vert \leq\zeta_{L}$ respectively. The following assumption is on the numbers of generic approximating functions in the first step and second step estimations.\ \begin{assumption} \label{AA-E-7} $n^{1/2}K^{-s/d}=o(1)$ and $n^{1/2}L^{-s_{1}/d_{w_{1}}}=o(1)$, and% \begin{equation} \xi_{0,K}^{2}(L^{2}+K^{2})\log(n)n^{-1}+\xi_{0,K}^{2}\zeta_{L}^{2}L(\zeta _{L}^{2}L+\xi_{0,K}^{2}K)n^{-1}+\xi_{1,K}^{2}LKn^{-1}=o(1). \label{AAE7-1}% \end{equation} \end{assumption} When the power series are used in the two-step estimation, we have $\zeta _{L}\leq CL$ and $\xi_{\delta,K}\leq CK^{1+2\delta}$ ($\delta=0,1$). Under the conditions that $n^{1/2}K^{-s/d}=o(1)$ and $n^{1/2}L^{-s_{1}/d_{w_{1}}}=o(1)$, the sufficient condition for (\ref{AAE7-1}) becomes% \[ (K^{7}L+K^{5}L^{3}+K^{2}L^{6})n^{-1}=o(1) \] which is implied by Assumption 8 in Newey, Powell and Vella (1999). When the splines are used in the two-step estimation, we have $\zeta_{L}\leq CL^{1/2}$ and $\xi_{\delta,K}\leq CK^{1/2+\delta}$ ($\delta=0,1$). Under the conditions that $n^{1/2}K^{-s/d}=o(1)$ and $n^{1/2}L^{-s_{1}/d_{w_{1}}}=o(1)$, the sufficient condition for (\ref{AAE7-1}) becomes% \[ (K^{4}L+K^{3}L^{2}+KL^{4})n^{-1}=o(1) \] which is also implied by Assumption 8 in Newey, Powell and Vella (1999). \begin{theorem} \label{AT-E-1}Under Assumptions \ref{AA-E-1}-\ref{AA-E-7}, we have% \begin{equation} \frac{\sqrt{n}\left[ \rho(\widehat{g}_{n})-\rho(g_{o})\right] }{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}\rightarrow_{d}N(0,1). \label{AT-E1-1}% \end{equation} \end{theorem} \begin{proof} [Proof of Theorem \ref{AT-E-1}]Define $\delta_{h,n}=\delta_{h,n}^{\ast}% \varrho_{n}$ and $\delta_{g,n}=\delta_{g,n}^{\ast}\varrho_{n}$ where $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-s_{1}/d_{w_{1}}}$, $\delta _{g,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+\delta_{h,n}^{\ast}$ and $\{ \varrho_{n}\}_{n}$ is a slowly divergent real positive sequence. Let $\mathcal{N}_{\gamma,n}=\{ \gamma\in\mathbb{R}^{L}$: $||\gamma-\gamma _{o,L}||\leq\delta_{h,n}\}$ where $\delta_{h,n}=\delta_{h,n}^{\ast}\varrho _{n}$ and $\{ \varrho_{n}\}_{n}$ is a slowly divergent real positive sequence.\ Similarly, define $\mathcal{N}_{\beta,n}=\{ \beta\in\mathbb{R}^{K}% $: $||\beta-\beta_{o,K}||\leq\delta_{g,n}\}$ where $\delta_{g,n}=\delta _{g,n}^{\ast}\varrho_{n}$. By Lemma \ref{AL-E-1}.(b) and Lemma \ref{AL-E-1}% .(d), we have $\widehat{\gamma}_{n}\in\mathcal{N}_{\gamma,n}$ and $\widehat{\beta}_{n}\in\mathcal{N}_{\beta,n}$ wpa1. Define $\mathcal{N}% _{h,n}=\{h\left( \cdot\right) =R\left( \cdot\right) ^{\prime}\gamma$: $\gamma\in\mathcal{N}_{\gamma,n}\}$ and $\mathcal{N}_{g,n}=\{g\left( \cdot\right) =P\left( \cdot\right) ^{\prime}\beta$: $\beta\in \mathcal{N}_{\gamma,n}\}$.\footnote{Let $\left\Vert h\right\Vert _{2}=(\mathbb{E}\left[ h(w_{1})^{2}\right] )^{1/2}$ denote the $L_{2}$-norm and $\left\Vert g\right\Vert _{2,\tau}=(\mathbb{E}\left[ \tau(w)g(w)^{2}% \right] )^{1/2}$ denote the restricted $L_{2}$-norm. One may also define the local neighborhoods of $h_{o}$ and $g_{o}$ as: $\mathcal{N}_{h,n}^{\prime }=\{h\left( \cdot\right) =R\left( \cdot\right) ^{\prime}\gamma$: $\left\Vert h-h_{o}\right\Vert _{2}\leq\delta_{h,L}\varrho_{n}^{\prime}\}$ and $\mathcal{N}_{g,n}^{\prime}=\{g\left( \cdot\right) =P\left( \cdot\right) ^{\prime}\beta$: $\left\Vert g-g_{o}\right\Vert _{2,\tau}\leq\delta _{g,L}\varrho_{n}^{\prime}\}$ respectively, where\ $\{ \varrho_{n}^{\prime }\}_{n}$ is a slowly divergent real sequence. For any $h=R\left( \cdot\right) ^{\prime}\gamma_{h}\in\mathcal{N}_{h,n}^{\prime}$, by the triangle inequality, \[ \left\Vert h-h_{o,n}\right\Vert \leq\left\Vert h-h_{o}\right\Vert +\left\Vert h_{o,n}-h_{o}\right\Vert \leq2\delta_{h,L}\varrho_{n}^{\prime}% \] which implies that $||\gamma_{h}-\gamma_{o,L}||\leq2\omega_{\min}^{-1}% (Q_{L})\delta_{h,L}\varrho_{n}^{\prime}$, where $\omega_{\min}(Q_{L})$ denotes the smallest eigenvalue of $Q_{L}$ which is bounded away from zero by Assumption \ref{AA-E-2}. Hence if we let $\varrho_{n}=2\omega_{\min}% ^{-1}(Q_{L})\varrho_{n}^{\prime}$, then $\gamma_{h}\in\mathcal{N}_{\gamma,n}$ which implies that $h\in\mathcal{N}_{h,n}$ and hence $\mathcal{N}% _{h,n}^{\prime}\subset\mathcal{N}_{h,n}$. Similarly, we can appropriately choose $\varrho_{n}$ such that $\mathcal{N}_{g,n}^{\prime}\subset \mathcal{N}_{g,n}$. This means the high-level sufficient conditions verified under $\mathcal{N}_{h,n}$ and/or $\mathcal{N}_{g,n}$ holds for their counterparts under $\mathcal{N}_{h,n}^{\prime}$ and/or $\mathcal{N}% _{g,n}^{\prime}$.}\ By Lemma \ref{AL-E-1}.(b) and Lemma \ref{AL-E-1}.(d), we have $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ and $\widehat{g}_{n}\in \mathcal{N}_{g,n}$ wpa1. The proof of the theorem is divided into three steps. \textbf{Step 1.} We verify Assumption 3.1 in HLR. By Assumptions \ref{AA-E-5} and \ref{AA-E-6}, Lemma \ref{AL-E-0} implies that \begin{equation} \left\Vert v_{n}^{\ast}\right\Vert _{sd}\rightarrow\mathbb{E}\left[ \eta ^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}\left[ u^{2}(v_{\Gamma }^{\ast}(w_{1}))^{2}\right] \label{P-AT-E1-1}% \end{equation} as $K\rightarrow\infty$ and $L\rightarrow\infty$, where $v_{\Gamma}^{\ast }(w_{1})=\mathbb{E}\left[ \tau(w)v_{g}^{\ast}(w)\partial_{u}g_{o}% (w)|w_{1}\right] $. The above limit is the same as the asymptotic variance defined in (5.7) of Newey, Powell and Vella (1999). By Assumption \ref{AA-E-5}, $\mathbb{E}\left[ \eta^{2}|x,w_{1}\right] >C_{\eta}$ where $C_{\eta}$ is a finite positive constant. This means that \begin{equation} \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] \geq C_{\eta }\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] >0 \label{P-AT-E1-2}% \end{equation} where the last inequality is by the fact that $\rho(g_{o})$ is an unknown value. If $\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] =0$, we have $\tau(w)(v_{g}^{\ast}(w))^{2}=0$ almost surely which together with (5.6) in Newey, Powell and Vella (1999) implies that $\rho(g)=0$ for any $g\in \mathcal{G}$, where $\mathcal{G}$ includes all additive functions satisfying Assumptions \ref{AA-E-3} and \ref{AA-E-5}. In such a case, $\rho(g_{o})$ will be a known (to zero) value. Combining the results in (\ref{P-AT-E1-1}) and (\ref{P-AT-E1-2}) we have $\liminf_{n}\left\Vert v_{n}^{\ast}\right\Vert _{sd}>0$, which verifies Assumption 3.1.(i). Because $\rho(\cdot)$ is a linear functional and $\left\Vert v_{n}^{\ast}\right\Vert _{sd}$ is bounded away from zero, Assumption 3.1.(ii) holds trivially. The strong norms $\left\Vert \cdot\right\Vert _{\mathcal{H}}$ and $\left\Vert \cdot\right\Vert _{\mathcal{G}}$ used to establish the convergence rate of $\widehat{h}_{n}$ and $\widehat{g}_{n}$ respectively are the $L_{2}$-norm $\left\Vert h\right\Vert _{2}=(\mathbb{E}\left[ (h(w_{1}))^{2}\right] )^{1/2}$ and the restricted $L_{2}$-norm $\left\Vert g\right\Vert _{2,\tau}=(\mathbb{E}\left[ \tau(w)(g(w))^{2}\right] )^{1/2}$ respectively (see footnote 1 for details). By the definitions of $\left\Vert \cdot\right\Vert _{\varphi}$ and $\left\Vert \cdot\right\Vert _{\psi}$,\ we can set $c_{\varphi}=1$ and $c_{\psi}=1$ such that $\left\Vert v_{h}\right\Vert _{\varphi}\leq c_{\varphi}\left\Vert v_{h}\right\Vert _{\mathcal{H}}$ and $\left\Vert v_{g}\right\Vert _{\psi}\leq c_{\psi}\left\Vert v_{g}\right\Vert _{\mathcal{G}}$ for any $v_{h}% \in\mathcal{V}_{1}$ and $v_{g}\in\mathcal{V}_{2}$. Under Assumptions \ref{AA-E-1}-\ref{AA-E-4}, we can use Lemma 4.1 of Newey, Powell and Vella (1999) to get% \begin{equation} \left\Vert \widehat{g}_{n}(w)-g_{o}\right\Vert _{\mathcal{G}}=\delta _{2,n}^{\ast} \label{P-AT-E1-3}% \end{equation} where $\delta_{2,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+L^{1/2}n^{-1/2}% +L^{-s_{1}/d_{w_{1}}}$. Let $g_{n}\left( \cdot\right) =g_{o,K}\left( \cdot\right) $ where $g_{o,K}$ is defined in (\ref{AA-AP-2}). Then by (\ref{AA-AP-2}) we have% \begin{equation} \left\Vert g_{n}-g_{o}\right\Vert _{\mathcal{G}}=\left\Vert g_{o,K}% -g_{o}\right\Vert _{\mathcal{G}}\leq\sup_{w\in\mathcal{T}_{w}}\left\vert g_{o,K}(w)-g_{o}(w)\right\vert =O(\delta_{2,n}^{\ast}), \label{P-AT-E1-4}% \end{equation} which finishes verification of Assumption 3.1.(iii). For Assumption 3.1.(iv), as $\rho(g)$ is linear and it only depends on $g$, it is sufficient to show that \begin{equation} \frac{1}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}\left\vert \rho (g_{o,n}-g_{o})\right\vert =o(n^{-\frac{1}{2}}) \label{P-AT-E1-5}% \end{equation} where $g_{o,n}$ denotes the projection of $g_{o}$ on the finite dimensional sieve space with respect to the restricted $L_{2}$-norm $\left\Vert \cdot\right\Vert _{2,\tau}$.\ By (5.6) in Newey, Powell and Vella (1999), \begin{align} \left\vert \rho(g_{o,n}-g_{o})\right\vert ^{2} & =\left\vert \mathbb{E}% \left[ \tau(w)v_{g}^{\ast}(w)(g_{o,n}(w)-g_{o}(w))\right] \right\vert ^{2}\nonumber\\ & \leq\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] \mathbb{E}% \left[ \tau(w)(g_{o,n}(w)-g_{o}(w))^{2}\right] \nonumber\\ & \leq\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] \mathbb{E}% \left[ \tau(w)(g_{o,K}(w)-g_{o}(w))^{2}\right] =O(K^{-2s/d}) \label{P-AT-E1-6}% \end{align} where the first inequality is by H\"{o}lder's inequality, the second inequality is by the definition of $g_{o,n}$, the last equality is by (\ref{AA-AP-2}) and Assumption \ref{AA-E-6}. By Assumption 3.1.(i) (which has already been verified), (\ref{P-AT-E1-6}) and Assumption \ref{AA-E-7}, we prove (\ref{P-AT-E1-5}) and hence Assumption 3.1.(iv). \textbf{Step 2.} We verify Assumption 3.2 of HLR. Let $u_{h}=x-h(w_{1})$ and $w_{h}=(x,w_{2}^{\prime},u_{h})^{\prime}$. By definition% \begin{align} & \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)-\Delta_{\psi}(Z_{2},g,h)[\pm \kappa_{n}u_{g_{n}}^{\ast}]\nonumber\\ & =-\frac{\tau(w_{h})(y-g(w_{h})\mp\kappa_{n}u_{g_{n}}^{\ast}(w))^{2}}% {2}\nonumber\\ & +\frac{\tau(w_{h})(y-g(w_{h}))^{2}}{2}-\tau(w_{h})(y-g(w_{h}))(\pm \kappa_{n}u_{g_{n}}^{\ast})\nonumber\\ & =-\frac{\kappa_{n}^{2}}{2}\tau(w_{h})(u_{g_{n}}^{\ast}(w))^{2}, \label{P-AT-E1-7}% \end{align} where $u_{g_{n}}^{\ast}(w)=v_{g_{n}}^{\ast}(w)/\left\Vert v_{n}^{\ast }\right\Vert _{sd}$ and\ $\left\Vert v_{n}^{\ast}\right\Vert _{sd}$ is defined in (\ref{V_n_EX2}). By the triangle inequality, Lemma \ref{AL-E-2}.(e)-(f) and (\ref{P-AT-E1-7}), \begin{align} & \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu _{n}\left\{ \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)-\Delta_{\psi}% (Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]\right\} \right\vert \nonumber\\ & \leq\frac{\kappa_{n}^{2}}{2}n^{-1}\sum_{i=1}^{n}(u_{g_{n}}^{\ast}% (w_{i})^{2}+\mathbb{E}[u_{g_{n}}^{\ast}(w)^{2}])=O_{p}(\kappa_{n}^{2}) \label{P-AT-E1-8}% \end{align} which verifies the first condition (12) of Assumption 3.2.(i) in HLR. Instead of verifying (13) of Assumption 3.2.(i) in HLR, we show that Assumption 3.4\ holds. Assumption 3.4.(i) is implied by Assumption \ref{AA-E-1}. Let $\tau(Z_{1},h)=\tau(w_{h})$ and $\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}% }^{\ast}]=(y-g(w_{h}))u_{g_{n}}^{\ast}(w)$. By definition, \begin{equation} \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]=\tau(w_{h})(y-g(w_{h}))u_{g_{n}% }^{\ast}(w)=\tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}}^{\ast}]. \label{P-AT-E1-9}% \end{equation} Therefore equation (18) of HLR holds. By definition,\ $\tau(w_{h})$ and $u_{g_{n}}^{\ast}(w)$ only depend on $(x,w_{1})$. By (\ref{A_NPV_3}), \begin{equation} \mathbb{E}\left[ \left. \Delta_{\psi}^{\ast}(Z_{2},g_{o},h_{o})[u_{g_{n}% }^{\ast}]\right\vert Z_{1}\right] =\mathbb{E}\left[ \left. (y-g_{o}% (w))u_{g_{n}}^{\ast}(w)\right\vert x,w_{1}\right] =u_{g_{n}}^{\ast }(w)\mathbb{E}\left[ \left. \eta\right\vert x,w_{1}\right] =0 \label{P-AT-E1-10}% \end{equation} which verifies (19) of HLR. By (\ref{P-AT-E1-9}) and (\ref{P-AT-E1-10}) we show that Assumption 3.4.(ii) of HLR holds. By definition, \begin{equation} \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}% ,h)[u_{g_{n}}^{\ast}]=\tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast}(w), \label{P-AT-E1-11}% \end{equation} and \begin{equation} \tau(Z_{2},h)(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}% ]-\Delta_{\psi}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])=\tau(w_{h}% )(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast}(w). \label{P-AT-E1-12}% \end{equation} Hence Assumption 3.4.(iii) follows by Lemmas \ref{AL-E-5} and \ref{AL-E-6}. By Assumption \ref{AA-E-5} and Lemma \ref{AL-E-2}.(f) we have for any $h$ \begin{align} & (\tau(Z_{1},h)-\tau(Z_{1},h_{o}))^{2}\mathbb{E}\left[ \left. (\Delta_{\psi}^{\ast}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])^{2}\right\vert Z_{1}\right] \nonumber\\ & =(\tau(w_{h})-\tau(w))^{2}(u_{g_{n}}^{\ast}(w))\mathbb{E}\left[ \left. \eta^{2}\right\vert Z_{1}\right] \leq C\xi_{0,K}^{2}(\tau(w_{h})-\tau (w))^{2}, \label{P-AT-E1-13}% \end{align} which together with Lemma \ref{AL-E-2}.(d) implies that% \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}(\tau(Z_{1,i}% ,h)-\tau(Z_{1,i},h_{o}))^{2}\mathbb{E}\left[ \left. (\Delta_{\psi}^{\ast }(Z_{2,i},g_{o},h_{o})[u_{g_{n}}^{\ast}])^{2}\right\vert Z_{1,i}\right] \nonumber\\ & \leq C\xi_{0,K}^{2}\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}% (\tau(Z_{1,i},h)-\tau(Z_{1,i},h_{o}))^{2}=O_{p}(\xi_{0,K}^{2}\zeta_{L}% \delta_{h,n}) \label{P-AT-E1-14}% \end{align} where the $\xi_{0,K}^{2}\zeta_{L}\delta_{h,n}=o(1)$ by Assumption \ref{AA-E-7}. This proves Assumption 3.4.(iv) and hence finishes verification of Assumption 3.4. We next verify Assumption 3.2.(ii) of HLR. By definition, \begin{equation} \psi(Z_{2},g,h)-\psi(Z_{2},g^{\ast},h)=\tau(w_{h})(y-g(w_{h}))(\mp\kappa _{n}u_{g_{n}}^{\ast})+\frac{\kappa_{n}^{2}}{2}\tau(w_{h})(u_{g_{n}}^{\ast }(w))^{2}, \label{P-AT-E1-15}% \end{equation} which together with Lemma \ref{AL-E-2}.(e) and the definition of $K_{\psi }(g,h)$ implies that \begin{equation} K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\mathbb{E}\left[ \tau (w_{h})(y-g(w_{h}))u_{g_{n}}^{\ast}(w)\right] +O(\kappa_{n}^{2}). \label{P-AT-E1-16}% \end{equation} By (\ref{A_NPV_3}), \begin{equation} \mathbb{E}\left[ (\tau(w_{h})-\tau(w))(y-g_{o}(w))u_{g_{n}}^{\ast}(w)\right] =0 \label{P-AT-E1-17}% \end{equation} which implies that \begin{align} & \mathbb{E}\left[ \tau(w_{h})(y-g(w_{h}))u_{g_{n}}^{\ast}(w)\right] \nonumber\\ & =\mathbb{E}\left[ \tau(w_{h})(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast }(w)\right] \nonumber\\ & +\mathbb{E}\left[ \tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast }(w)\right] . \label{P-AT-E1-18}% \end{align} Using the second order expansion in (\ref{AL-E5-1}),% \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ \tau(w_{h}% )(g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1})))u_{g_{n}% }^{\ast}(w)\right] \right\vert \nonumber\\ & \leq C\sup_{w}|u_{g_{n}}^{\ast}(w)|\sup_{h\in\mathcal{N}_{h,n}}% \mathbb{E}\left[ (h(w)-h_{o}(w))^{2}\right] \leq C\xi_{0,K}\delta_{h,n}% ^{2}=o_{p}(n^{-1/2}), \label{P-AT-E1-19}% \end{align} where the second inequality is by Lemma \ref{AL-E-2}.(b) and \ref{AL-E-2}.(f), the equality is by Assumption \ref{AA-E-7}. By Assumption \ref{AA-E-5}, (\ref{AA-AP-1}), Lemma \ref{AL-E-1}.(c) and \ref{AL-E-2}.(g) and the definition of $\mathcal{N}_{h,n}$, \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(w_{h}% )-\tau(w))\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast }(w)\right] \right\vert \nonumber\\ & \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \left( \sup _{h\in\mathcal{N}_{h,n}}\sup_{w_{1}}\left\vert h(w_{1})-h_{o}(w_{1}% )\right\vert \right) \sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert \tau(w_{h})-\tau(w)\right\vert \right] \nonumber\\ & \leq C\xi_{0,K}\zeta_{L}\delta_{h,n}\left( \zeta_{L}\sup_{\gamma \in\mathcal{N}_{\gamma,n}}\left\Vert \gamma-\gamma_{o,L}\right\Vert +CL^{-s_{1}/d_{w_{1}}}\right) \nonumber\\ & \leq C\xi_{0,K}\zeta_{L}^{2}\delta_{h,n}^{2}=o_{p}(n^{-1/2}), \label{P-AT-E1-20}% \end{align} where the equality is by Assumption \ref{AA-E-7}. By (\ref{P-AT-E1-19}), (\ref{P-AT-E1-20}) and the triangle inequality,% \begin{equation} \mathbb{E}\left[ \tau(w_{h})(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast }(w)\right] =\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1}% )-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right] +o_{p}(n^{-1/2}), \label{P-AT-E1-21}% \end{equation} uniformly over $(h,g)\in\mathcal{N}_{n}$. By (\ref{AL-E6-2}) in the proof of Lemma \ref{AL-E-6}, \begin{align} & \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\mathbb{E}\left[ \left\vert \tau(w_{h})(g_{o}(w_{h})-g(w_{h})-g_{o}(w)+g(w))u_{g_{n}}^{\ast }(w)\right\vert \right] \nonumber\\ & \leq\xi_{1,K}\sup_{\beta\in\mathcal{N}_{\beta,n}}\left\Vert \beta -\beta_{o,K}\right\Vert \sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert u_{g_{n}}^{\ast}(w)(h(w_{1})-h_{o}(w_{1}))\right\vert \right] \nonumber\\ & \leq\xi_{1,K}\delta_{g,n}\left\Vert u_{g_{n}}^{\ast}\right\Vert _{2}% \sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{2}\nonumber\\ & \leq\xi_{1,K}\delta_{g,n}\delta_{h,n}=o_{p}(n^{-1/2}), \label{P-AT-E1-22}% \end{align} where the second inequality is by H\"{o}lder's inequality and the definition of $\mathcal{N}_{\beta,n}$, the third inequality is by Lemma \ref{AL-E-2}.(e) and the definition of $\mathcal{N}_{h,n}$, the equality is by Assumption \ref{AA-E-7}. Similarly by (\ref{AA-AP-2}), Lemma \ref{AL-E-2}.(b), \ref{AL-E-2}.(c) and \ref{AL-E-2}.(g), \begin{align} & \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\mathbb{E}\left[ \left\vert (\tau(w_{h})-\tau(w))(g_{o}(w)-g(w))u_{g_{n}}^{\ast}(w)\right\vert \right] \nonumber\\ & \leq\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{g\in \mathcal{N}_{g,n}}\sup_{w}\left\vert g(w)-g_{o}(w)\right\vert \sup _{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert \tau(w_{h})-\tau (w)\right\vert \right] \nonumber\\ & \leq C\xi_{0,K}\zeta_{L}\delta_{h,n}\sup_{\beta\in\mathcal{N}_{\beta,n}% }\left[ \xi_{0,K}\left\Vert \beta-\beta_{o,K}\right\Vert +CK^{-s/d}\right] \nonumber\\ & \leq C\xi_{0,K}^{2}\zeta_{L}\delta_{g,n}\delta_{h,n}=o_{p}(n^{-1/2}), \label{P-AT-E1-23}% \end{align} where the equality is by Assumption \ref{AA-E-7}. By (\ref{P-AT-E1-22}), (\ref{P-AT-E1-23}) and the triangle inequality,% \begin{equation} \mathbb{E}\left[ \tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast }(w)\right] =\mathbb{E}\left[ \tau(w)(g_{o}(w)-g(w))u_{g_{n}}^{\ast }(w)\right] +o_{p}(n^{-1/2}), \label{P-AT-E1-24}% \end{equation} uniformly over $(h,g)\in\mathcal{N}_{n}$. Collecting the results in (\ref{P-AT-E1-16}), (\ref{P-AT-E1-18}), (\ref{P-AT-E1-21}) and (\ref{P-AT-E1-24}), we deduce that% \begin{align} K_{\psi}(g,h)-K_{\psi}(g^{\ast},h) & =\mathbb{E}\left[ \tau(w_{h}% )(y-g(w_{h}))(\mp\kappa_{n}u_{g_{n}}^{\ast}(w))\right] \nonumber\\ & =\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}% ))(\mp\kappa_{n}u_{g_{n}}^{\ast}(w))\right] \nonumber\\ & +\mathbb{E}\left[ \tau(w)(g_{o}(w)-g(w))(\mp\kappa_{n}u_{g_{n}}^{\ast }(w))\right] +o_{p}(n^{-1/2}) \label{P-AT-E1-25}% \end{align} uniformly over $(h,g)\in\mathcal{N}_{n}$. By definition,% \begin{equation} \Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] =\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right] , \label{P-AT-E1-26}% \end{equation} for any $h\in\mathcal{N}_{h,n}$.\ By Jensen's inequality, (\ref{AA-AP-1}), Assumptions \ref{AA-E-5}, \ref{AA-E-7} and the definition of $h_{o,n}$,% \begin{align} \left\vert \Gamma(\alpha_{o})\left[ h_{o,n}-h_{o},u_{g_{n}}^{\ast}\right] \right\vert & \leq\mathbb{E}\left[ \left\vert \tau(w)\partial_{u}% g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right\vert \right] \nonumber\\ & \leq C(\mathbb{E}[\left\vert h(w_{1})-h_{o}(w_{1})\right\vert ^{2}% ])^{1/2}=o(n^{-1/2}). \label{P-AT-E1-26a}% \end{align} Moreover, \begin{align} & \frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}\nonumber\\ & =\mathbb{E}\left[ (g(w)-g_{o}(w))(\mp\kappa_{n}u_{g_{n}}^{\ast }(w))\right] +\frac{\kappa_{n}^{2}}{2}\mathbb{E}\left[ u_{g_{n}}^{\ast }(w)^{2}\right] \nonumber\\ & =\mathbb{E}\left[ (g(w)-g_{o}(w))(\mp\kappa_{n}u_{g_{n}}^{\ast }(w))\right] +O_{p}(\kappa_{n}^{2}) \label{P-AT-E1-27}% \end{align} uniformly over $g\in\mathcal{N}_{g,n}$, where the second equality is by Lemma \ref{AL-E-2}.(e). Collecting the results in (\ref{P-AT-E1-25}), (\ref{P-AT-E1-26}), (\ref{P-AT-E1-26a}) and (\ref{P-AT-E1-27}) proves Assumption 3.2(ii). \textbf{Step 3.} We verify Assumption 3.3 of HLR. As $\rho(g)$ does not depend on $h$, we only need to show that \begin{equation} \left\vert \langle\widehat{h}_{n}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi }-\mu_{n}\left\{ \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast }]\right\} \right\vert =O_{p}(\kappa_{n}). \label{P-AT-E1-28}% \end{equation} By definition% \begin{equation} \langle\widehat{h}_{n}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi}% =\langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi}+\langle h_{o,L}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi}. \label{P-AT-E1-29}% \end{equation} By H\"{o}lder's inequality, (\ref{AA-AP-1}), Lemma \ref{AL-E-2}.(h) and Assumption \ref{AA-E-7},% \begin{equation} \left\vert \langle h_{o,L}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi }\right\vert \leq\left\Vert u_{\Gamma_{n}}^{\ast}\right\Vert \left\Vert h_{o,L}-h_{o}\right\Vert =o(n^{-1/2}). \label{P-AT-E1-30}% \end{equation} By definition, \begin{equation} \widehat{h}_{n}(w_{1})-h_{o,L}(w_{1})=R(w_{1})^{\prime}\left( R_{n}% R_{n}^{\prime}\right) ^{-1}R_{n}(H_{n}-H_{L,n})+R(w_{1})^{\prime}\left( R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}U_{n} \label{P-AT-E1-31}% \end{equation} where $H_{n}=(h_{o}(w_{1,1}),\ldots,h_{o}(w_{1,n}))^{\prime}$, $H_{L,n}% =(h_{o,L}(w_{1,1}),\ldots,h_{o,L}(w_{1,n}))^{\prime}$ and $U_{n}=(u_{1}% ,\ldots,u_{n})^{\prime}$. By definition% \begin{align} \langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi} & =\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K})^{\prime}Q_{K}% ^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}U_{n}\nonumber\\ & +\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K})^{\prime}% Q_{K}^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}(H_{n}-H_{L,n}). \label{P-AT-E1-32}% \end{align} By the Cauchy-Schwarz inequality, \begin{align} & \left\vert \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K}% )^{\prime}Q_{K}^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}(H_{n}% -H_{L,n})\right\vert ^{2}\nonumber\\ & =\left\vert n^{-1}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho (P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-1}R_{n}% (H_{n}-H_{L,n})\right\vert ^{2}\nonumber\\ & \leq\frac{\rho(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L}% )^{-1}HQ_{K}^{-1}\rho(P_{K})}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}% }\nonumber\\ & \times\frac{(H_{n}-H_{L,n})^{\prime}R_{n}^{\prime}(R_{n}R_{n}^{\prime })^{-1}R_{n}(H_{n}-H_{L,n})}{n}\nonumber\\ & \leq\omega_{\min}^{-1}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L})\sup_{w_{1}% }\left\vert h_{o}(w_{1})-h_{o,L}(w_{1})\right\vert \frac{\rho(P_{K})^{\prime }Q_{K}^{-1}H^{\prime}Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})}{\left\Vert v_{n}^{\ast }\right\Vert _{sd}}\nonumber\\ & \leq C\omega_{\min}^{-1}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L}% )L^{-2s_{1}/d_{w_{1}}}\frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast }(w)\right\vert ^{2}\right] }{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}% =o_{p}(n^{-1}) \label{P-AT-E1-33}% \end{align} where $\widehat{Q}_{n,L}=n^{-1}R_{n}R_{n}^{\prime}$,\ the second inequality is by the fact that $R_{n}^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}$ is an idempotent matrix, the third inequality is by (\ref{AA-AP-1}) and the definition of $v_{\Gamma_{n}}^{\ast}$, the last equality is by Lemma \ref{AL-E-1}.(a), \ref{AL-E-2}.(h) and Assumption \ref{AA-E-7}.\ Hence we have \begin{equation} \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K})^{\prime}Q_{K}% ^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}(H_{n}-H_{L,n})=o_{p}(n^{-1/2}). \label{P-AT-E1-34}% \end{equation} By the i.i.d. assumption, Assumption \ref{AA-E-5} and Lemma \ref{AL-E-1}.(a), \begin{equation} \mathbb{E}\left[ \left. \left\Vert n^{-1}Q_{L}^{-1}R_{n}U_{n}\right\Vert ^{2}\right\vert \{w_{1,i}\}_{i=1}^{n}\right] \leq\mathbb{E}\left[ \left. u^{2}\right\vert w_{1}\right] n^{-1}tr(Q_{L}^{-1}\widehat{Q}_{n,L}% )=O_{p}(n^{-1}), \label{P-AT-E1-35}% \end{equation} which together with the Markov inequality implies that \begin{equation} \left\Vert n^{-1}Q_{L}^{-1}R_{n}U_{n}\right\Vert =O_{p}(n^{-1/2}). \label{P-AT-E1-36}% \end{equation} By the definition of $v_{\Gamma_{n}}^{\ast}$, Assumption \ref{AA-E-5}, Lemma \ref{AL-E-1}.(a) and \ref{AL-E-2}.(h), \begin{align} & \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\rho(P_{K})^{\prime}Q_{K}% ^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-2}HQ_{K}^{-1}\rho(P_{K})\nonumber\\ & \leq\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\omega_{\min}% ^{-2}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L})\rho(P_{K})^{\prime}Q_{K}% ^{-1}H^{\prime}Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})\nonumber\\ & \leq\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\omega_{\min}% ^{-2}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L})\mathbb{E}\left[ (v_{\Gamma_{n}% }^{\ast}(w_{1}))^{2}\right] =O_{p}(1). \label{P-AT-E1-37}% \end{align} By Lemma \ref{AL-E-1}.(a), (\ref{P-AT-E1-36}), (\ref{P-AT-E1-37}) and the Cauchy-Schwarz inequality% \begin{align} & \left\vert (n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\rho (P_{K})^{\prime}Q_{K}^{-1}H^{\prime}((\widehat{Q}_{n,L})^{-1}-Q_{L}^{-1}% )R_{n}U_{n}\right\vert \nonumber\\ & =\left\vert (n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\rho (P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-1}(\widehat{Q}% _{n,L}-Q_{L})Q_{L}^{-1}R_{n}U_{n}\right\vert \nonumber\\ & \leq\left\Vert \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho (P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-1}\right\Vert \left\Vert \widehat{Q}_{n,L}-Q_{L}\right\Vert \left\Vert n^{-1}Q_{L}^{-1}% R_{n}U_{n}\right\Vert \nonumber\\ & =O_{p}(\zeta_{L}L^{1/2}n^{-1})=o_{p}(n^{-1/2}) \label{P-AT-E1-38}% \end{align} where the last equality is by Assumption \ref{AA-E-7}. By (\ref{P-AT-E1-29}), (\ref{P-AT-E1-30}), (\ref{P-AT-E1-32}), (\ref{P-AT-E1-33}) and (\ref{P-AT-E1-38}), \begin{align} \langle\widehat{h}_{n}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi} & =(n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\rho(P_{K})^{\prime}% Q_{K}^{-1}H^{\prime}Q_{K}Q_{L}^{-1}R_{n}U_{n}+o_{p}(n^{-1/2})\nonumber\\ & =(n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\sum_{i=1}^{n}% v_{\Gamma_{n}}^{\ast}(w_{1,i})u_{i}+o_{p}(n^{-1/2})\nonumber\\ & =\mu_{n}\left\{ \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast }]\right\} +o_{p}(n^{-1/2}) \label{P-AT-E1-39}% \end{align} where the second equality is by the definition of $v_{\Gamma_{n}}^{\ast}$, and the third equality is by the definition of $\Delta_{\varphi}(Z_{1}% ,h_{o})[u_{\Gamma_{n}}^{\ast}]$. This verifies Assumption 3.3.(i) in HLR. To verify Assumption 3.3.(ii) in HLR, we notice that \begin{align} & n^{-\frac{1}{2}}\sum_{i=1}^{n}\left\{ \Delta_{\varphi}(Z_{1,i}% ,h_{o})[u_{\Gamma_{n}}^{\ast}]+\Delta_{\psi}(Z_{2,i},g_{o},h_{o})[u_{g_{n}% }^{\ast}]\right\} \nonumber\\ & =n^{-\frac{1}{2}}\sum_{i=1}^{n}\left\{ u_{\Gamma_{n}}^{\ast}(w_{1,i}% )u_{i}+u_{g_{n}}^{\ast}(w_{i})\eta_{i}\right\} . \label{P-AT-E1-40}% \end{align} To show the asymptotic normality of the above partial sum, we apply the Lindbergh-Feller CLT. By the Cauchy-Schwarz inequality, Assumption \ref{AA-E-5}, Lemma \ref{AL-E-2}.(h) \begin{align} \frac{\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert v_{\Gamma_{n}}^{\ast}% (w_{1})\right\vert ^{2}}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}} & \leq\zeta_{L}^{2}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\left\Vert Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})\right\Vert ^{2}\nonumber\\ & \leq\omega_{\min}^{-1}(Q_{L})\zeta_{L}^{2}\left\Vert v_{n}^{\ast }\right\Vert _{sd}^{-2}\rho(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}Q_{L}% ^{-1}HQ_{K}^{-1}\rho(P_{K})\nonumber\\ & =\frac{C\zeta_{L}^{2}}{\omega_{\min}(Q_{L})}\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] }{\left\Vert v_{g_{n}}^{\ast }\right\Vert _{2}^{2}}=O(\zeta_{L}^{2}), \label{P-AT-E1-41}% \end{align} where the first equality is by the definition of $v_{\Gamma_{n}}^{\ast}$. By Assumption \ref{AA-E-5}, (\ref{P-AT-E1-41}), Lemma \ref{AL-E-2}% .(f)-\ref{AL-E-2}.(h), \begin{align} & \frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1})u+v_{g_{n}}^{\ast }(w)\eta)^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}% }\nonumber\\ & \leq8\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1})u)^{4}\right] +\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w)\eta)^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}}\nonumber\\ & \leq8C\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1}))^{4}\right] +\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{4}\right] }{n\left\Vert v_{n}% ^{\ast}\right\Vert _{sd}^{4}}\nonumber\\ & \leq8C\frac{\sup_{w_{1}}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}+\sup _{w}(v_{g_{n}}^{\ast}(w))^{2}}{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}% }\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{2}\right] }{\left\Vert v_{n}^{\ast }\right\Vert _{sd}^{2}}\nonumber\\ & =O((\zeta_{L}^{2}+\xi_{0,K}^{2})n^{-1})=o(1) \label{P-AT-E1-42}% \end{align} where the equality is by Assumption \ref{AA-E-7}. This verifies the Lindbergh's condition. Hence Assumption 3.3.(ii) in HLR follows by the i.i.d. assumption and the Lindbergh-Feller CLT.{} Finally, we verify Assumption 3.3.(iii) in HLR. First, we have $\varepsilon_{2,n}=0$ because the estimators in both the first step and the second step have closed form expressions. By definition, $\delta_{2,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+L^{1/2}% n^{-1/2}+L^{-s_{1}/d_{w_{1}}}$ which together with $K\rightarrow\infty$\ and $L\rightarrow\infty$ implies that $n^{1/2}(\delta_{2,n}^{\ast})^{-1}=o(1)$. Moreover by Lemma \ref{AL-E-2}.(e), $||u_{g_{n}}^{\ast}||_{\psi}% =(\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{2}\right] )^{1/2}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}=O(1)$ which finishes verification of Assumption 3.3.(iii) in HLR. \end{proof} \bigskip\ \begin{corollary} \label{C-AL-E-1} Under Assumptions \ref{AA-E-1}-\ref{AA-E-7}, Assumptions \ref{L-SA-4}-\ref{L-SA-6} hold. \end{corollary} \begin{proof} [Proof of Lemma \ref{AL-E-0}]We first verify Assumption \ref{L-SA-4}. By definition, \begin{equation} _{z_{1}\in\mathcal{Z}_{1},h\in\mathcal{N}_{h,n}}\left[ \left\vert \tau (z_{1},h)\right\vert +\left\vert \tau(z_{1},h_{o})\right\vert \right] \leq2 \label{P-CAL-E1-1}% \end{equation} which shows that Assumption \ref{L-SA-4}.(i) holds. By definition, $\psi ^{\ast}(z_{2},\alpha)=-\frac{1}{2}(y-g(w_{h}))^{2}$, which implies that \begin{equation} \Delta_{\psi}^{\ast}(z_{2},\alpha)[v_{g,1}]=(y-g(w_{h}))v_{g,1} \label{P-CAL-E1-2}% \end{equation} and \begin{equation} r_{\psi,g}^{\ast}(z_{2},\alpha)[v_{g,1},v_{g,2}]=-v_{g,1}v_{g,2} \label{P-CAL-E1-3}% \end{equation} for any $v_{g,1}$, $v_{g,2}\in\mathcal{V}_{2}$, which implies that Assumption \ref{L-SA-1}.(i) holds. Moreover% \begin{align} & \psi(z_{2},g^{\ast},h)-\psi(z_{2},\alpha)-\Delta_{\psi}(z_{2},\alpha )[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}r_{\psi,g}(z_{2},\alpha )[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\nonumber\\ & =\tau(z_{1},h)\left[ \begin{array} [c]{c}% \psi^{\ast}(z_{2},g^{\ast},h)-\psi^{\ast}(z_{2},\alpha)\\ -\Delta_{\psi}^{\ast}(z_{2},\alpha)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa _{n}^{2}r_{\psi,g}^{\ast}(z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}] \end{array} \right] =0, \label{P-CAL-E1-5}% \end{align} for any $\alpha\in\mathcal{N}_{n}$ and any $z_{2}\in\mathcal{Z}_{2}$. This means that Assumption \ref{L-SA-1}.(ii) holds for $\psi(z_{2},\alpha)$ with $\Lambda_{1,n}(z_{2})=0$.\ By definition,% \begin{align} \Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] & =\mathbb{E}% \left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast }(w)\right] \nonumber\\ & =\mathbb{E}\left[ \tau(w)r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h_{o,n}% -h_{o},u_{g_{n}}^{\ast}]\right] . \label{P-CAL-E1-5a}% \end{align} Therefore, Assumption \ref{L-SA-1}.(v) has been verified in (\ref{P-AT-E1-26a}% ) above. By (\ref{A_NPV_3}) and (\ref{P-CAL-E1-2}), \begin{equation} \mathbb{E}\left[ \left. \Delta_{\psi}^{\ast}(Z_{2},g_{o},h_{o})[u_{g_{n}% }^{\ast}]\right\vert Z_{1}\right] =\mathbb{E}\left[ \left. \eta u_{g_{n}% }^{\ast}(w)\right\vert x,w_{1}\right] =u_{g_{n}}^{\ast}(w)\mathbb{E}\left[ \left. \eta\right\vert x,w_{1}\right] =0 \label{P-CAL-E1-6}% \end{equation} which verifies Assumption \ref{L-SA-4}.(iii). By definition,% \begin{equation} r_{\psi,h}^{\ast}(z_{2},\alpha)[v_{g},v_{h}]=\partial_{u}g(w_{h})v_{g}v_{h} \label{P-CAL-E1-7}% \end{equation} for any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha\in\mathcal{N}_{\alpha}$, any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}\in\mathcal{V}_{2}$, which implies that Assumption \ref{L-SA-4}.(iv) holds. By the triangle inequality, (\ref{AA-AP-2}) and (\ref{AL-E6-2}) in the proof of Lemma \ref{AL-E-6}, for any $\alpha\in\mathcal{N}_{n}$,% \begin{align} & \left\vert \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}% (z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}% }^{\ast}]\right\vert \nonumber\\ & =\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (y-g(w_{h}))-(y-g_{o}% (w_{h}))+(g(w)-g_{o}(w))\right) \right\vert \nonumber\\ & =\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (g_{o}(w_{h}% )-g(w_{h}))+(g(w)-g_{o}(w))\right) \right\vert \nonumber\\ & \leq\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (g_{o,K}% (w_{h})-g(w_{h}))-(g_{o,K}(w)-g(w))\right) \right\vert \nonumber\\ & +\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (g_{o,K}(w_{h}% )-g_{o}(w_{h}))-(g_{o,K}(w)-g_{o}(w))\right) \right\vert \nonumber\\ & \leq\xi_{1,K}\left\Vert \beta-\beta_{o,K}\right\Vert \left\vert (h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}\right\vert +CK^{-s/d}|u_{g_{n}}^{\ast }|\nonumber\\ & \leq\xi_{1,K}\delta_{g,n}\left\vert (h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast }\right\vert +CK^{-s/d}|u_{g_{n}}^{\ast}|. \label{P-CAL-E1-8}% \end{align} Let $\Lambda_{3,n}(z_{2},\alpha)=\xi_{1,K}\delta_{g,n}\left\vert (h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}\right\vert +CK^{-s/d}|u_{g_{n}}^{\ast }|$. By Lemma \ref{AL-E-2}.(a), \ref{AL-E-2}.(f) and Assumption \ref{AA-E-7}, \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\sum_{i=1}^{n}\Lambda_{3,n}(Z_{2,i},\alpha )=o_{p}(n^{-1/2}). \label{P-CAL-E1-9}% \end{equation} Similarly, by Lemma \ref{AL-E-2}.(b), \ref{AL-E-2}.(e) and Assumption \ref{AA-E-7},% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \Lambda_{3,n}(Z_{2}% ,\alpha)\right] =o(n^{-1/2}). \label{P-CAL-E1-10}% \end{equation} This verifies Assumption \ref{L-SA-4}.(v). By definition, \begin{equation} \mathbb{E}\left[ \left. (\Delta_{\psi}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}% }^{\ast}])^{2}\right\vert Z_{1}=z_{1}\right] =(u_{g_{n}}^{\ast}% (w))^{2}\mathbb{E}\left[ \left. \eta^{2}\right\vert Z_{1}=z_{1}\right] \leq C\xi_{0,K}^{2} \label{P-CAL-E1-11}% \end{equation} where the inequality is by Assumption \ref{AA-E-5} and Lemma \ref{AL-E-2}.(g). By Lemma \ref{AL-E-2}.(d), Assumption \ref{L-SA-4}.(vii) holds with $\delta_{\tau,n}^{\ast}=\zeta_{L}\delta_{h,n}$, and $\delta_{\tau,n}^{\ast}% \xi_{0,K}^{2}=o(1)$ follows by Assumption \ref{AA-E-7}. We next verify Assumption \ref{L-SA-5}. By (\ref{P-CAL-E1-3}), Assumption \ref{L-SA-5}.(i) holds with $\Lambda_{6,n}(z_{2},\alpha)=0$ for any $z_{2}% \in\mathcal{Z}_{2}$ and any $\alpha\in\mathcal{N}_{n}$. This also means that Assumptions \ref{L-SA-5}.(vii)-(viii) also hold for $\Lambda_{6,n}% (z_{2},\alpha)$.\ By Assumption \ref{AA-E-3}, (\ref{P-CAL-E1-2}) and (\ref{P-CAL-E1-7}),% \begin{align} & \left\vert \Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}% ]-\Delta_{\psi}^{\ast}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}^{\ast }(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\vert \nonumber\\ & =\left\vert u_{g_{n}}^{\ast}(w)\left( (y-g_{o}(w_{h}))-(y-g_{o}% (w))+\partial_{u}g_{o}(w)(h(w)-h_{o}(w))\right) \right\vert \nonumber\\ & =\left\vert u_{g_{n}}^{\ast}(w)\left( (g_{o}(w)-g_{o}(w_{h}))-\partial _{u}g_{o}(w)(h_{o}(w)-h(w))\right) \right\vert \nonumber\\ & \leq C\left\vert u_{g_{n}}^{\ast}(w)(h(w)-h_{o}(w))^{2}\right\vert . \label{P-CAL-E1-12}% \end{align} Let $\Lambda_{7,n}(z_{2},\alpha)=C\left\vert u_{g_{n}}^{\ast}(w)(h(w)-h_{o}% (w))^{2}\right\vert $. Then by Lemma \ref{AL-E-2}.(a) and \ref{AL-E-2}.(g), and Assumption \ref{AA-E-7}, \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\sum_{i=1}^{n}\Lambda_{7,n}(Z_{2,i},\alpha )=o_{p}(n^{-1/2}). \label{P-CAL-E1-13}% \end{equation} Similarly, by Lemma \ref{AL-E-2}.(b) and \ref{AL-E-2}.(g), and Assumption \ref{AA-E-7},% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \Lambda_{7,n}(Z_{2}% ,\alpha)\right] =o_{p}(n^{-1/2}). \label{P-CAL-E1-14}% \end{equation} This shows that Assumptions \ref{L-SA-5}.(ii) and \ref{L-SA-5}.(vii)-(viii) hold. For any $h\in\mathcal{N}_{h,n}$, \begin{align} & \left\vert (\tau(Z_{1},h)-\tau(Z_{1},h_{o}))r_{\psi,h}^{\ast}(Z_{2}% ,\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\vert \nonumber\\ & =\left\vert (\tau(w_{h})-\tau(w))\partial_{u}g_{o}(w)(h(w_{1})-h_{o}% (w_{1}))u_{g_{n}}^{\ast}\right\vert \nonumber\\ & \leq C\zeta_{L}\xi_{0,K}\delta_{h,n}\left\vert \tau(w_{h})-\tau (w)\right\vert \label{P-CAL-E1-15}% \end{align} where the inequality is by Assumption \ref{AA-E-5}, (\ref{PAL-E2-02}) in the proof of Lemma \ref{AL-E-2}\ and Lemma \ref{AL-E-2}.(g). By (\ref{P-CAL-E1-15}% ),\ Lemma \ref{AL-E-2}.(c) and Assumption \ref{AA-E-7}, \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}% ,h)-\tau(Z_{1},h_{o}))r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}% }^{\ast}]\right] \right\vert \leq C\zeta_{L}^{2}\xi_{0,K}\delta_{h,n}% ^{2}=o(n^{-1/2}), \label{P-CAL-E1-16}% \end{equation} which verifies Assumption \ref{L-SA-5}.(iii). By (\ref{AA-AP-2}) and Lemma \ref{AL-E-1}.(d), for any $g\in\mathcal{N}_{g,n}$ \begin{align} & \sup_{w}\left\vert (g(w)-g_{o}(w))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\ & =\sup_{w}\left\vert \tau(w)(g(w)-g_{o}(w))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\ & \leq\sup_{w\in\mathcal{T}_{w}}\left\vert \tau(w)(g(w)-g_{o,K}(w))u_{g_{n}% }^{\ast}(w)\right\vert +\sup_{w\in\mathcal{T}_{w}}\left\vert \tau (w)(g_{o,K}(w)-g_{o}(w))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\ & \leq\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{w\in \mathcal{T}_{w}}\left\vert g(w)-g_{o,K}(w)\right\vert \nonumber\\ & +\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{w\in \mathcal{T}_{w}}\left\vert g(w)-g_{o,K}(w)\right\vert \nonumber\\ & \leq C\xi_{0,K}(\left\Vert \beta-\beta_{o,K}\right\Vert \xi_{0,K}% +K^{-s/d})\leq C\xi_{0,K}^{2}\delta_{g,n} \label{P-CAL-E1-17}% \end{align} where the first equality is by $\tau(w)^{2}=\tau(w)$, the first inequality is by the triangle inequality, the third inequality is by (\ref{AA-AP-2}) and Lemma \ref{AL-E-2}.(g), and the last inequality is by the definition of $\mathcal{N}_{g,n}$. For any $\alpha\in\mathcal{N}_{n}$, \begin{align} & \left\vert (\tau(Z_{1},h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2}% ,\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\vert \nonumber\\ & =\left\vert (\tau(w_{h})-\tau(w))(g(w)-g_{o}(w))u_{g_{n}}^{\ast}\right\vert \nonumber\\ & \leq C\xi_{0,K}^{2}\delta_{g,n}\left\vert \tau(Z_{1},h)-\tau(Z_{1}% ,h_{o})\right\vert \label{P-CAL-E1-18}% \end{align} where the inequality is by (\ref{P-CAL-E1-17}) and the definition of $\mathcal{N}_{n}$. By (\ref{P-CAL-E1-15}),\ Lemma \ref{AL-E-2}.(c) and Assumption \ref{AA-E-7},% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}% ,h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}% }^{\ast}]\right] \right\vert \leq C\zeta_{L}\xi_{0,K}^{2}\delta_{h,n}% \delta_{g,n}=o(n^{-1/2}), \label{P-CAL-E1-19}% \end{equation} which verifies Assumption \ref{L-SA-5}.(iv). By (\ref{P-CAL-E1-3}), Assumption \ref{L-SA-5}.(v) holds with $\Lambda_{8,n}(z_{2},\alpha)=0$ for any $z_{2}% \in\mathcal{Z}_{2}$ and any $\alpha\in\mathcal{N}_{n}$. By\ (\ref{P-CAL-E1-3}) and Lemma \ref{AL-E-2}.(e), Assumption \ref{L-SA-5}.(vi) also holds. Assumptions \ref{L-SA-5}.(vii) and \ref{L-SA-5}.(viii) have been verified together with Assumptions \ref{L-SA-5}.(i) and \ref{L-SA-5}.(ii). Finally, we verify Assumption \ref{L-SA-6}. Let $h_{o,n}=h_{o,L}$. By (\ref{P-CAL-E1-7}), \ref{AA-AP-1}), Assumption \ref{AA-E-7} and Lemma \ref{AL-E-2}.(e) \begin{align} & \mathbb{E}\left[ \left\vert r_{\psi,h}^{\ast}(Z_{2},\alpha_{o}% )[h_{o,L}-h_{o},u_{g_{n}}^{\ast}]\right\vert \right] \nonumber\\ & =\mathbb{E}\left[ \left\vert \partial_{u}g_{o}(w)(h_{o,L}-h_{o})u_{g_{n}% }^{\ast}\right\vert \right] \nonumber\\ & \leq C(\mathbb{E}\left[ \left\vert (h_{o,L}-h_{o})^{2}\right\vert \right] )^{1/2}(\mathbb{E}\left[ \left\vert (u_{g_{n}}^{\ast})^{2}\right\vert \right] )^{1/2}\nonumber\\ & \leq CL^{-s_{1}/d_{w_{1}}}=o(n^{-1/2}) \label{P-CAL-E1-20}% \end{align} which verifies Assumption \ref{L-SA-6}.(i). Assumption \ref{L-SA-6}.(ii) can be verified using the same arguments of the proof of Lemma \ref{AL-E-3}. Let $g_{o,n}=g_{o,K}$. By (\ref{P-CAL-E1-3}), (\ref{AA-AP-2}), Assumption \ref{AA-E-7} and Lemma \ref{AL-E-2}.(e) \begin{align} & \mathbb{E}\left[ \left\vert r_{\psi,g}^{\ast}(Z_{2},\alpha_{o}% )[g_{o,K}-g_{o},u_{g_{n}}^{\ast}]\right\vert \right] \nonumber\\ & =\mathbb{E}\left[ \left\vert (g_{o,K}-g_{o})u_{g_{n}}^{\ast}\right\vert \right] \nonumber\\ & \leq C(\mathbb{E}\left[ \left\vert (g_{o,K}-g_{o})^{2}\right\vert \right] )^{1/2}(\mathbb{E}\left[ \left\vert (u_{g_{n}}^{\ast})^{2}\right\vert \right] )^{1/2}\nonumber\\ & \leq CL^{-s/d}=o(n^{-1/2}) \label{P-CAL-E1-21}% \end{align} which verifies Assumption \ref{L-SA-6}.(iii). For any $h\in\mathcal{N}_{h,n}% $, \begin{align} & \mathbb{E}\left[ \tau(Z_{1},h)^{2}(r_{\psi,h}^{\ast}(z_{2},\alpha _{o})[h-h_{o,L},u_{g_{n}}^{\ast}])^{2}\right] \nonumber\\ & =\mathbb{E}\left[ \tau(w_{h})^{2}(\partial_{u}g_{o}(w)(h-h_{o,L})u_{g_{n}% }^{\ast})^{2}\right] \nonumber\\ & \leq C\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert (h(w_{1})-h_{o,L}% (w_{1}))^{2}\right\vert \mathbb{E}\left[ (u_{g_{n}}^{\ast})^{2}\right] \leq C\zeta_{L}^{2}\delta_{h,n}^{2} \label{P-CAL-E1-22}% \end{align} where the first inequality is by $\tau(w_{h})^{2}<1$ for any\ $h\in \mathcal{N}_{h,n}$ and Assumption \ref{AA-E-5}, the last inequality is by (\ref{PAL-E2-02}) and Lemma \ref{AL-E-2}.(e). Moreover, for any $f\in \mathcal{F}_{3,n}^{\ast}$, \begin{equation} \sup_{z_{2}\in\mathcal{Z}_{2}}\left\vert f(z_{2})\right\vert \leq\left( \sup_{h\in\mathcal{N}_{h,n}}\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert (h(w_{1})-h_{o,L}(w_{1}))^{2}\right\vert \right) \left( \sup_{w\in \mathcal{W}}\left\vert (u_{g_{n}}^{\ast}(w))^{2}\right\vert \right) \leq C\xi_{0,K}^{2}\zeta_{L}^{2}\delta_{h,n}^{2}, \label{P-CAL-E1-23}% \end{equation} which together with (\ref{P-CAL-E1-22}) and Assumption \ref{AA-E-7} implies that \begin{align} & (\sup_{f\in\mathcal{F}_{3,n}^{\ast}}\mathbb{E}\left[ f^{2}\right] +(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{3,n}^{\ast}(z_{2})|\log(n)n^{-1}% )(K+L)\log(n)\nonumber\\ & \leq C(\zeta_{L}^{2}\delta_{h,n}^{2}(K+L)+(K+L)^{2}\xi_{0,K}^{2}\zeta _{L}^{2}\delta_{h,n}^{2}\log(n)n^{-1})\log(n)=o(1). \label{P-CAL-E1-24}% \end{align} This verifies Assumption \ref{L-SA-6}.(v) for $\mathcal{F}_{3,n}^{\ast}$. For any $h\in\mathcal{N}_{h,n}\ $and $g\in\mathcal{N}_{g,n}$, \begin{equation} \tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o,K},u_{g_{n}}^{\ast }]=\tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}(\beta-\beta_{o,K}). \label{P-CAL-E1-25}% \end{equation} Hence Assumption \ref{L-SA-6}.(iv) can be verified using the same arguments of Lemma \ref{AL-E-4}. For any $h\in\mathcal{N}_{h,n}\ $and $g\in\mathcal{N}% _{g,n}$, \begin{align} & \mathbb{E}\left[ \tau(Z_{1},h)^{2}(r_{\psi,g}^{\ast}(z_{2},\alpha _{o})[g-g_{o,K},u_{g_{n}}^{\ast}])^{2}\right] \nonumber\\ & =\mathbb{E}\left[ \tau(w_{h})^{2}((g-g_{o,K})u_{g_{n}}^{\ast})^{2}\right] \nonumber\\ & \leq C\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert \tau(w)(g(w)-g_{o,K}% (w))^{2}\right\vert \mathbb{E}\left[ (u_{g_{n}}^{\ast})^{2}\right] \leq C\xi_{0,K}^{2}\delta_{g,n}^{2} \label{P-CAL-E1-26}% \end{align} where the first inequality is by $\tau(w)^{2}=\tau(w)$ and $\tau(w_{h})^{2}<1$ for any $w$ and any $h\in\mathcal{N}_{h,n}$, the second inequality is by the definition of $\mathcal{N}_{g,n}$ and Lemma \ref{AL-E-2}.(e). Moreover, for any $f\in\mathcal{F}_{4,n}^{\ast}$,% \begin{equation} \sup_{z_{2}\in\mathcal{Z}_{2}}\left\vert f(z_{2})\right\vert \leq\left( \sup_{g\in\mathcal{N}_{h,n}}\sup_{w\in\mathcal{T}_{w}}\left\vert (g(w)-g_{o,K}(w))^{2}\right\vert \right) \left( \sup_{w\in\mathcal{W}% }\left\vert (u_{g_{n}}^{\ast}(w))^{2}\right\vert \right) \leq C\xi_{0,K}% ^{4}\delta_{g,n}^{2}, \label{P-CAL-E1-27}% \end{equation} which together with (\ref{P-CAL-E1-26}) and Assumption \ref{AA-E-7} implies that \begin{align} & (\sup_{f\in\mathcal{F}_{4,n}^{\ast}}\mathbb{E}\left[ f^{2}\right] +(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{4,n}^{\ast}(z_{2})|\log(n)n^{-1}% )(K+L)\log(n)\nonumber\\ & \leq C(\xi_{0,K}^{2}\delta_{g,n}^{2}(K+L)+(K+L)^{2}\xi_{0,K}^{4}% \delta_{g,n}^{2}\log(n)n^{-1})\log(n)=o(1). \label{P-CAL-E1-28}% \end{align} This verifies Assumption \ref{L-SA-6}.(v) for $\mathcal{F}_{3,n}^{\ast}$. \end{proof} \bigskip \begin{lemma} \label{AL-E-0} Let $v_{\Gamma}^{\ast}(w_{1})=\mathbb{E}\left[ \tau (w)v_{g}^{\ast}(w)\partial_{u}g_{o}(w)|w_{1}\right] $.\ Under Assumptions \ref{AA-E-5} and \ref{AA-E-6}, we have (a) $\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}]\rightarrow0$ as $K\rightarrow\infty$; (b) $\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] \rightarrow\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] $ as $K\rightarrow\infty$; (c)\ $\mathbb{E}\left[ |v_{\Gamma_{n}}^{\ast}(w_{1})-v_{\Gamma}^{\ast}% (w_{1})|^{2}\right] \rightarrow0$ as $K\rightarrow\infty$ and $L\rightarrow \infty$; (d) $\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] \rightarrow\mathbb{E}\left[ u^{2}(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] $ as $K\rightarrow\infty$ and $L\rightarrow\infty$. \end{lemma} \begin{proof} [Proof of Lemma \ref{AL-E-0}](a) By the definition of $v_{g_{n}}^{\ast}$ and Assumption \ref{AA-E-6},% \begin{equation} \mathbb{E}[\tau(w)P(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w))]=\mathbf{0}% _{K\times1} \label{PAA-E-L1-1}% \end{equation} which immediately implies that \begin{align} & \mathbb{E}[\tau(w)|P(w)^{\prime}\beta_{g,K}-v_{g}^{\ast}(w)|^{2}% ]\nonumber\\ & =\mathbb{E}[\tau(w)|P(w)^{\prime}\beta_{g,K}-v_{g_{n}}^{\ast}% (w)|^{2}]+\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}% ]\nonumber\\ & \geq\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}] \label{PAA-E-L1-2}% \end{align} for any $\beta_{g,K}\in\mathbb{R}^{K}$. Hence as $K\rightarrow\infty$, \begin{equation} \mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}]\leq \mathbb{E}[\tau(w)|P(w)^{\prime}\beta_{v,K}-v_{g}^{\ast}(w)|^{2}]\rightarrow0, \label{PAA-E-L1-3}% \end{equation} where $\beta_{v,K}$ is defined in Assumption \ref{AA-E-6}. (b) By Assumption \ref{AA-E-5}, Jensen's inequality and H\"{o}lder's inequality,% \begin{align} & \left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}% (w)-v_{g}^{\ast}(w))v_{g}^{\ast}(w)\right] \right\vert \nonumber\\ & \leq C\mathbb{E}\left[ \tau(w)\left\vert (v_{g_{n}}^{\ast}(w)-v_{g}^{\ast }(w))v_{g}^{\ast}(w)\right\vert \right] \nonumber\\ & \leq C(\mathbb{E}\left[ \tau(w)\left\vert (v_{g_{n}}^{\ast}(w)-v_{g}% ^{\ast}(w))^{2}\right\vert \right] \mathbb{E}\left[ \tau(w)(v_{g}^{\ast }(w))^{2}\right] )^{1/2} \label{PAA-E-L1-4}% \end{align} which together with Assumption \ref{AA-E-6} and the result proved in (a) implies that% \begin{equation} \left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast }(w))v_{g}^{\ast}(w)\right] \right\vert \rightarrow0\text{ as }% K\rightarrow\infty. \label{PAA-E-L1-5}% \end{equation} By the triangle inequality, \begin{align} & \left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}% (w))^{2}\right] -\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}% (w))^{2}\right] \right\vert \nonumber\\ & \leq\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast }(w))^{2}\right] \nonumber\\ & +2\left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}% (w)-v_{g}^{\ast}(w))v_{g}^{\ast}(w)\right] \right\vert , \label{PAA-E-L1-6}% \end{align} which combined with the results in (\ref{PAA-E-L1-3}), (\ref{PAA-E-L1-4}) and (\ref{PAA-E-L1-5}) proves the claim (b). (c) Let $v_{\Gamma,L}^{\ast}(w_{1})=R(\cdot)^{\prime}Q_{L}^{-1}\mathbb{E}% \left[ R(w_{1})\tau(w)\partial_{u}g_{o}(w)v_{g}^{\ast}(w)\right] $. Then% \begin{equation} v_{\Gamma_{n}}^{\ast}(w_{1})-v_{\Gamma,L}^{\ast}(w_{1})=R(\cdot)^{\prime}% Q_{L}^{-1}\mathbb{E}\left[ R(w_{1})\tau(w)\partial_{u}g_{o}(w)(v_{g_{n}% }^{\ast}(w)-v_{g}^{\ast}(w))\right] . \label{PAA-E-L1-7}% \end{equation} By the (matrix) Cauchy-Schwarz inequality, Assumption \ref{AA-E-5} and the result proved in (a), \begin{align} \mathbb{E}\left[ |v_{\Gamma_{n}}^{\ast}(w_{1})-v_{\Gamma,L}^{\ast}% (w_{1})|^{2}\right] & \leq\mathbb{E}\left[ \tau(w)(\partial_{u}% g_{o}(w))^{2}(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w))^{2}\right] \nonumber\\ & \leq C\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}% (w))^{2}\right] \rightarrow0\text{ } \label{PAA-E-L1-8}% \end{align} as $K\rightarrow\infty$. Using the same arguments after display (A.9) of Newey, Powell and Vella (1999) (their $b_{L}(z)$ and $\rho(z)$ are $v_{\Gamma,L}^{\ast}(w_{1})$ and $v_{\Gamma}^{\ast}(w_{1})$ here respectively), we can show that \begin{equation} \mathbb{E}\left[ |v_{\Gamma,L}^{\ast}(w_{1})-v_{\Gamma}^{\ast}(w_{1}% )|^{2}\right] \rightarrow0\text{ as }L\rightarrow\infty\text{.} \label{PAA-E-L1-9}% \end{equation} Combining the results in (\ref{PAA-E-L1-8}) and (\ref{PAA-E-L1-9}), we immediately prove the claim in (c). (d) The proof follows similar arguments in the proof of claim (b) and hence is omitted. \end{proof} Let $\widehat{Q}_{n,L}=n^{-1}R_{n}R_{n}^{\prime}$ and $\widehat{Q}% _{n,K}=n^{-1}\widehat{P}_{n}^{\prime}\widehat{P}_{n}$, which are the estimators of $Q_{L}=\mathbb{E}\left[ R(w_{1})R(w_{1})^{\prime}\right] $ and $Q_{K}=\mathbb{E}\left[ \tau(w)P(w)P(w)^{\prime}\right] $ respectively. The following Lemma is useful to verify the high-level conditions for the asymptotic normality. The proof of the results in Lemmas \ref{AL-E-1}.(a) and \ref{AL-E-1}.(b) are in Newey (1997) and the proof of the remaining results are in\ Newey, Powell and Vella (1999). \begin{lemma} \label{AL-E-1} Let $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-s_{1}/d_{w_{1}}}$ and $\delta_{g,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+\delta_{h,n}^{\ast}$. Under Assumptions \ref{AA-E-1}-\ref{AA-E-4}, we have (a)\ $||\widehat{Q}_{n,L}-Q_{L}||=O_{p}(\zeta_{L}L^{1/2}n^{-1/2})$; (b) $||\widehat{\gamma}_{n}-\gamma_{o,L}||=O_{p}(\delta_{h,n}^{\ast})$; (c) $||\widehat{Q}_{n,K}-Q_{K}||=O_{p}(\xi_{1,K}^{2}(\delta_{h,n}^{\ast}% )^{2}+K^{1/2}\xi_{1,K}\delta_{h,n}^{\ast}+\xi_{0,K}^{2}\zeta_{L}\delta _{h,n}^{\ast})$; (d)\ $||\widehat{\beta}_{n}-\beta_{o,K}||=O_{p}(\delta_{g,n}^{\ast})$; (e) $n^{-1}\sum\nolimits_{i=1}^{n}\left\vert \widehat{\tau}_{i}-\tau _{i}\right\vert =O_{p}(\zeta_{L}\delta_{h,n}^{\ast})$. \end{lemma} Recall that $\mathcal{N}_{\gamma,n}=\{ \gamma\in\mathbb{R}^{L}$: $||\gamma-\gamma_{o,L}||\leq\delta_{h,n}\}$ and $\mathcal{N}_{\beta,n}=\{ \beta\in\mathbb{R}^{K}$: $||\beta-\beta_{o,K}||\leq\delta_{g,n}\}$ where $\delta_{h,n}=\delta_{h,n}^{\ast}\varrho_{n}$, $\delta_{g,n}=\delta _{g,n}^{\ast}\varrho_{n}$ and $\{ \varrho_{n}\}_{n}$ is a slowly divergent real positive sequence.\ By Lemma \ref{AL-E-1}.(b) and Lemma \ref{AL-E-1}.(d), we have $\widehat{\gamma}_{n}\in\mathcal{N}_{\gamma,n}$ and $\widehat{\beta }_{n}\in\mathcal{N}_{\beta,n}$ wpa1. Define $\mathcal{N}_{h,n}=\{h\left( \cdot\right) =R\left( \cdot\right) ^{\prime}\gamma$: $\gamma\in \mathcal{N}_{\gamma,n}\}$ and $\mathcal{N}_{g,n}=\{g\left( \cdot\right) =P\left( \cdot\right) ^{\prime}\beta$: $\beta\in\mathcal{N}_{\gamma,n}\}$. The following Lemma is useful to verify the high-level conditions. \begin{lemma} \label{AL-E-2} Under Assumptions \ref{AA-E-1}-\ref{AA-E-6}, we have (a)\ $\sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert R(w_{1})^{\prime}\gamma-h_{o}(w_{1})\right\vert ^{2}\right] =O_{p}(\delta_{h,n}^{2})$; (b) $\sup_{\gamma\in\mathcal{N}_{\gamma,n}}\mathbb{E}\left[ \left\vert R(w_{1,i})^{\prime}\gamma-h_{o}(w_{1,i})\right\vert ^{2}\right] =O(\delta_{h,n}^{2})$; (c) $\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert \tau(w_{h}% )-\tau(w)\right\vert \right] \leq C\zeta_{L}\delta_{h,n}$; (d) $\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum\nolimits_{i=1}^{n}\left[ \left\vert \tau(w_{h,i})-\tau(w_{i})\right\vert \right] =O_{p}(\zeta _{L}\delta_{h,n})$; (e) $\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{2}\right] \leq C\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}$; (f) $n^{-1}\sum_{i=1}^{n}(v_{g_{n}}^{\ast}(w_{i}))^{2}\left\Vert v_{n}^{\ast }\right\Vert _{sd}^{-2}=O_{p}(1)$; (g) $\sup_{w}\left\vert v_{g_{n}}^{\ast}(w)\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\right\vert \leq C\xi_{0,K}$; (h) $\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast}(w_{1})\right\vert ^{2}\right] \leq C\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}$. \end{lemma} \begin{proof} [Proof of Lemma \ref{AL-E-2}]Following Newey (1997) we assume without loss of generality that $Q_{L}=I_{L}$ and $Q_{K}=I_{K}$. Such an assumption can be verified under Assumption \ref{AA-E-2} for the power series and splines using the arguments in the proof of Theorem 4 and Theorem 7 of Newey (1997) respectively. (a) By Assumption \ref{AA-E-4}, Lemma \ref{AL-E-1}.(a), $Q_{L}=I_{L}$,\ the Cauchy-Schwarz inequality, the definition of $\mathcal{N}_{\gamma,n}$ and (\ref{AA-AP-1}),% \begin{align*} & \sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert R(w_{1,i})^{\prime}\gamma-h_{o}(w_{1,i})\right\vert ^{2}\right] \\ & \leq2\sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert R(w_{1,i})^{\prime}\gamma-h_{o,L}(w_{1,i})\right\vert ^{2}\right] +2\sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert h_{o,L}(w_{1,i})-h_{o}(w_{1,i})\right\vert ^{2}\right] \\ & \leq2\sup_{\gamma\in\mathcal{N}_{\gamma,n}}(\gamma-\gamma_{o,L})^{\prime }\widehat{Q}_{n,L}(\gamma-\gamma_{o,L})+2Ck^{-s_{1}/d_{w_{1}}}\\ & \leq2\omega_{\max}(\widehat{Q}_{n,L})\sup_{\gamma\in\mathcal{N}_{\gamma,n}% }\left\Vert \gamma-\gamma_{o,L}\right\Vert ^{2}+2Ck^{-s_{1}/d_{w_{1}}}% =O_{p}(\delta_{h,n}^{2}), \end{align*} which proves the claim in (a). (b)\ The proof follows similar arguments to those in the proof of (a) and is omitted. (c) For any $h\left( \cdot\right) =P(\cdot)^{\prime}\gamma\in\mathcal{N}% _{h,n}$, \begin{align} \left\vert h(w_{1})-h_{o}(w_{1})\right\vert & \leq\left\vert P(w_{1}% )^{\prime}\gamma-h_{o,L}(w_{1})\right\vert +\left\vert h_{o,L}(w_{1}% )-h_{o}(w_{1})\right\vert \nonumber\\ & \leq\zeta_{L}\left\Vert \gamma-\gamma_{o,L}\right\Vert +CL^{-s_{1}% /d_{w_{1}}}\leq C\zeta_{L}\delta_{h,n} \label{PAL-E2-02}% \end{align} which implies that% \begin{align} \left\vert \tau(w_{h})-\tau(w)\right\vert & \leq\left\vert I\left\{ u\leq b+P(w_{1})^{\prime}\gamma-h_{o}(w_{1})\right\} -I\{u\leq b\} \right\vert \nonumber\\ & +\left\vert I\left\{ u\geq a+P(w_{1})^{\prime}\gamma-h_{o}(w_{1})\right\} -I\{u\geq a\} \right\vert \nonumber\\ & \leq I\left\{ \left\vert u-b\right\vert \leq\left\vert P(w_{1})^{\prime }\gamma-h_{o}(w_{1})\right\vert \right\} \nonumber\\ & +I\left\{ \left\vert u-a\right\vert \leq\left\vert P(w_{1})^{\prime}% \gamma-h_{o}(w_{1})\right\vert \right\} \nonumber\\ & \leq I\left\{ \left\vert u-b\right\vert \leq C\zeta_{L}\delta _{h,n}\right\} +I\left\{ \left\vert u-a\right\vert \leq C\zeta_{L}% \delta_{h,n}\right\} , \label{PAL-E2-2}% \end{align} where $\zeta_{L}\delta_{h,n}=o(1)$ by Assumption \ref{AA-E-7}. As the density of $u$ is bounded in the local neighborhoods of $a$ and $b$ (which is assumed in Lemma A3 of Newey, Powell and Vella (1999)), by (\ref{PAL-E2-2}) we get \begin{equation} \mathbb{E}\left[ \sup_{h\in\mathcal{N}_{h,n}}\left\vert \tau(w_{h}% )-\tau(w)\right\vert \right] \leq C\zeta_{L}\delta_{h,n} \label{PAL-E2-3}% \end{equation} which finishes the proof. (d) By (\ref{PAL-E2-3}) and the Markov inequality we immediately get the asserted result. (e)\ By the definition of $\eta$ and Assumption \ref{AA-E-5}, $\mathbb{E}% \left[ \eta^{2}|x,w_{1}\right] \geq C_{\eta}$ where $C_{\eta}$ is a finite positive constant.\ Thus \begin{align} \mathbb{E}[(v_{g_{n}}^{\ast}(w))^{2}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}] & =\frac{\mathbb{E}[(v_{g_{n}}^{\ast}(w))^{2}]}{\mathbb{E}% \left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\ & \leq\frac{\mathbb{E}[(v_{g_{n}}^{\ast}(w))^{2}]}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\ & =\frac{\mathbb{E}[\tau(w)(v_{g_{n}}^{\ast}(w))^{2}]}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\leq C_{\eta}^{-1} \label{PAL-E2-4}% \end{align} where the second equality is by the definition of $v_{g_{n}}^{\ast}$ and $\tau(w)^{2}=\tau(w)$. (f) The asserted result follows by (e) and the Markov inequality. (g) By the Cauchy-Schwarz inequality and Assumption \ref{AA-E-5}, \begin{align} \left\vert v_{g_{n}}^{\ast}(w)\right\vert ^{2}\left\Vert v_{n}^{\ast }\right\Vert _{sd}^{-2} & =\frac{\left\vert \tau(w)P(w)^{\prime}Q_{K}% ^{-1}\rho(P_{K})\right\vert ^{2}}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}% ^{\ast}(w_{1}))^{2}\right] +\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}% ^{\ast}(w))^{2}\right] }\nonumber\\ & \leq\frac{\rho(P_{K})^{\prime}Q_{K}^{-2}\rho(P_{K})\left\Vert \tau(w)P(w)\right\Vert ^{2}}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast }(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast }(w))^{2}\right] }\nonumber\\ & \leq\frac{\xi_{0,K}^{2}\rho(P_{K})^{\prime}Q_{K}^{-2}\rho(P_{K}% )}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta }\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\ & \leq\frac{\xi_{0,K}^{2}\omega_{\min}^{-1}(Q_{K})\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }{\mathbb{E}\left[ u^{2}% (v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\ & \leq\xi_{0,K}^{2}\omega_{\min}^{-1}(Q_{K})C_{\eta}^{-1} \label{PAL-E2-5}% \end{align} for any $w$. This combined with $Q_{K}=I_{K}$ immediately proves the claim. (h) By Lemmas \ref{AL-E-0}.(b) and \ref{AL-E-0}.(d), \begin{equation} \mathbb{E}[\left\vert v_{\Gamma_{n}}^{\ast}(w_{1})\right\vert ^{2}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}]\rightarrow\frac{\mathbb{E}\left[ (v_{\Gamma}^{\ast}(w_{1}))^{2}\right] }{\mathbb{E}\left[ \eta^{2}% \tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}\left[ u^{2}(v_{\Gamma}% ^{\ast}(w_{1}))^{2}\right] } \label{PAL-E2-6}% \end{equation} as $K\rightarrow\infty$ and $L\rightarrow\infty$, where $v_{\Gamma}^{\ast }(w_{1})=\mathbb{E}\left[ \tau(w)v_{g}^{\ast}(w)\partial_{u}g_{o}% (w)|w_{1}\right] $. By Assumption \ref{AA-E-5} and Jensen's inequality,% \begin{equation} \mathbb{E}\left[ (v_{\Gamma}^{\ast}(w_{1}))^{2}\right] \leq C\mathbb{E}% \left[ (\mathbb{E}\left[ \tau(w)v_{g}^{\ast}(w)|w_{1}\right] )^{2}\right] \leq C\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] . \label{PAL-E2-7}% \end{equation} By Assumption \ref{AA-E-5}, $\mathbb{E}\left[ \eta^{2}|x,w_{1}\right] \geq C_{\eta}$ where $C_{\eta}$ is a finite positive constant, which together with (\ref{PAL-E2-7}) implies that \begin{align} & \frac{\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] }% {\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}% \left[ u^{2}(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] }\nonumber\\ & \leq\frac{\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] }{C_{\eta }\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}\left[ u^{2}(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] }\leq C_{\eta}^{-1}. \label{PAL-E2-8}% \end{align} The asserted claim follows from (\ref{PAL-E2-6}) and (\ref{PAL-E2-8}). \end{proof} \bigskip \begin{lemma} \label{AL-E-3} Define $\mathcal{F}_{1,n}=\{(x,w_{1})\mapsto\partial_{u}% g_{o}(w)\tau(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w):h\in \mathcal{N}_{h,n}\}$. Then the uniform entropy numbers of $\mathcal{F}_{1,n}$ satisfies% \begin{equation} \sup_{Q}N(\varepsilon\left\Vert F_{1,n}\right\Vert _{Q,2},\mathcal{F}% _{1,n},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any }\varepsilon\in(0,1], \label{P-SQRL1-1}% \end{equation} where $C$ is a finite fixed constant, $Q$ ranges over all finitely-discrete probabilities measures and $F_{1,n}$ denotes the envelope of $\mathcal{F}% _{1,n}$. \end{lemma} \begin{proof} [Proof of Lemma \ref{AL-E-3}]Let $\tau(x,w_{2})=\prod\nolimits_{j=1}% ^{d_{w_{2}}+1}I\{a_{j}\leq w_{j}\leq b_{j}\}$, $a=a_{d_{w_{2}}+2}$ and $b=b_{d_{w_{2}}+2}$. Then by definition, \begin{equation} \tau(w_{h})=\tau(x,w_{2})I\{a\leq x-h(w_{1})\leq b\}. \label{AL-E3-0}% \end{equation} Define \begin{align} \mathcal{F}_{11,n} & =\{(x,w_{1})\mapsto I\{x\leq b+R(w_{1})^{\prime}% \gamma:\gamma\in\mathcal{N}_{\gamma,n}\};\label{AL-E3-1}\\ \mathcal{F}_{12,n} & =\{(x,w_{1})\mapsto I\{x\geq a+R(w_{1})^{\prime}% \gamma:\gamma\in\mathcal{N}_{\gamma,n}\};\label{AL-E3-2}\\ \mathcal{F}_{13,n} & =\{(x,w_{1})\mapsto\tau(x,w_{2})\partial_{u}% g_{o}(w)(R(w_{1})^{\prime}\gamma-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w):\gamma \in\mathcal{N}_{\gamma,n}\}. \label{AL-E3-3}% \end{align} Then by Lemmas 2.6.15 and 2.6.18 in van der Vaart and Wellner (1996), the VC-dimentions of $\mathcal{F}_{11,n}$, $\mathcal{F}_{12,n}$ and $\mathcal{F}% _{13,n}$ are of order $L$. By\ Theorem 2.6.7 in van der Vaart and Wellner (1996), the uniform entropy number of $\mathcal{F}_{1j,n}$ satisfies \begin{equation} \sup_{Q}N(\varepsilon\left\Vert F_{1j,n}\right\Vert _{Q,2},\mathcal{F}% _{1j,n},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any }\varepsilon\in(0,1], \label{AL-E3-4}% \end{equation} where $C$ is a universal constant and $F_{1j,n}$ denotes the envelope of $\mathcal{F}_{1j,n}$ for $j=1,2,3$.\ Because \begin{equation} \mathcal{F}_{1,n}\subset\{f_{1}f_{2}f_{3}:f_{1}\in\mathcal{F}_{11,n},f_{2}% \in\mathcal{F}_{12,n},f_{3}\in\mathcal{F}_{13,n}\}, \label{AL-E3-5}% \end{equation} by (A.6) and (A.7) in Andrews (1994), \begin{align} & \sup_{Q}N(\varepsilon\left\Vert F_{11,n}F_{12,n}F_{13,n}\right\Vert _{Q,2},\mathcal{F}_{1,n},L_{2}(Q))\nonumber\\ & \leq\prod\nolimits_{j=1}^{3}\sup_{Q}N(\varepsilon\left\Vert F_{1j,n}% \right\Vert _{Q,2}/3,\mathcal{F}_{1j,n},L_{2}(Q))\leq(C/\varepsilon)^{CL} \label{AL-E3-6}% \end{align} where the second inequality is by (\ref{AL-E3-4}). This proves (\ref{P-SQRL1-1}) with $F_{1,n}=F_{11,n}F_{12,n}F_{13,n}$. \end{proof} \bigskip \begin{lemma} \label{AL-E-4} Define $\mathcal{F}_{2,n}=\{(x,w_{1})\mapsto\tau(w_{h}% )u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha:h\in\mathcal{N}_{h,n}$, $\alpha \in\mathbb{S}^{K-1}\}$, where $\mathbb{S}^{K-1}=\{ \alpha\in\mathbb{R}% ^{K}:\alpha^{\prime}\alpha=1\}$. Then the uniform entropy numbers of $\mathcal{F}_{2,n}$ satisfies% \begin{equation} \sup_{Q}N(\varepsilon\left\Vert F_{2,n}\right\Vert _{Q,2},\mathcal{F}% _{2,n},L_{2}(Q))\leq(C/\varepsilon)^{C(L+K)}\ \text{for any }\varepsilon \in(0,1], \label{P-SQRL1-2}% \end{equation} where $C$ is a finite fixed constant, $Q$ ranges over all finitely-discrete probabilities measures and $F_{2,n}$ denotes the envelope of $\mathcal{F}% _{2,n}$. \end{lemma} \begin{proof} [Proof of Lemma \ref{AL-E-4}]Define% \begin{equation} \mathcal{F}_{21,n}=\{(x,w_{1})\mapsto\tau(x,w_{2})u_{g_{n}}^{\ast }(w)P(w)^{\prime}\alpha:\alpha\in\mathbb{S}^{K-1}\}, \label{AL-E4-1}% \end{equation} where $\tau(x,w_{2})$ is defined in the proof of Lemma \ref{AL-E-3}. Then by Lemmas 2.6.15 and 2.6.18 in van der Vaart and Wellner (1996), the VC-dimension of $\mathcal{F}_{21,n}$ is of order $K$. By\ Theorem 2.6.7 in van der Vaart and Wellner (1996), the uniform entropy number of $\mathcal{F}_{21,n}$ satisfies \begin{equation} \sup_{Q}N(\varepsilon\left\Vert F_{21,n}\right\Vert _{Q,2},\mathcal{F}% _{21,n},L_{2}(Q))\leq(C/\varepsilon)^{CK}\ \text{for any }\varepsilon\in(0,1], \label{AL-E4-2}% \end{equation} where $C$ is a universal constant and $F_{21,n}$ denotes the envelope of $\mathcal{F}_{21,n}$. The rest of the proof is the same as Lemma \ref{AL-E-3}, because \begin{equation} \mathcal{F}_{2,n}\subset\{f_{1}f_{2}f_{3}:f_{1}\in\mathcal{F}_{11,n},f_{2}% \in\mathcal{F}_{12,n},f_{3}\in\mathcal{F}_{21,n}\}, \label{AL-E4-3}% \end{equation} where $\mathcal{F}_{11,n}$ and $\mathcal{F}_{12,n}$ are defined in (\ref{AL-E3-1}) and (\ref{AL-E3-2}) respectively. Hence (\ref{P-SQRL1-2}) holds with $F_{2,n}=F_{11,n}F_{12,n}F_{21,n}$. \end{proof} \bigskip \begin{lemma} \label{AL-E-5} Under Assumptions \ref{AA-E-1}-\ref{AA-E-7}, \[ \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h}% )(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast}(w)\right\} \right\vert =o_{p}(n^{-1/2}). \] \end{lemma} \begin{proof} [Proof of Lemma \ref{AL-E-5}]Let $u_{h}=x-h(w_{1})$. As $u=x-h_{o}(w_{1})$, we have $u-u_{h}=h(w_{1})-h_{o}(w_{1})$ by definition. By\ Assumption\ \ref{AA-E-5}, \begin{equation} \left\vert g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}% (w_{1}))\right\vert \leq C\left\vert h(w_{1})-h_{o}(w_{1})\right\vert ^{2} \label{AL-E5-1}% \end{equation} which together with the triangle inequality, Lemmas \ref{AL-E-2}.(a), \ref{AL-E-2}.(b)\ and \ref{AL-E-2}.(g) implies that \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h}% )(g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1})))u_{g_{n}% }^{\ast}(w)\right\} \right\vert \nonumber\\ & \leq C\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert h(w_{1,i})-h_{o}(w_{1,i})\right\vert ^{2}|u_{g_{n}}^{\ast}(w_{i})|\right] \nonumber\\ & +C\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert h(w_{1}% )-h_{o}(w_{1})\right\vert ^{2}|u_{g_{n}}^{\ast}(w)|\right] \nonumber\\ & \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{h\in \mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert h(w_{1,i}% )-h_{o}(w_{1,i})\right\vert ^{2}\right] \nonumber\\ & +C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{h\in \mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert h(w_{1,i})-h_{o}(w_{1,i}% )\right\vert ^{2}\right] \overset{}{=}O_{p}(\xi_{0,K}\delta_{h,n}^{2}). \label{AL-E5-2}% \end{align} By Assumption \ref{AA-E-7}, $\xi_{0,K}\delta_{h,n}^{2}=o(n^{-1/2})$. Hence by (\ref{AL-E5-2}) we have% \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h}% )(g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1})))u_{g_{n}% }^{\ast}(w)\right\} \right\vert =o_{p}(n^{-1/2}). \label{AL-E5-3}% \end{equation} We next show that \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h})\partial _{u}g_{o}(w)(h(w)-h_{o}(w))u_{g_{n}}^{\ast}(w)\right\} \right\vert =o_{p}(n^{-1/2}). \label{AL-E5-4}% \end{equation} Let $\mathcal{F}_{1,n}=\{(x,w_{1})\mapsto\partial_{u}g_{o}(w)\tau (w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w):h\in\mathcal{N}_{h,n}% \}$.\ By Assumption \ref{AA-E-5}, Lemmas \ref{AL-E-2}.(b)\ and \ref{AL-E-2}% .(g),% \begin{align} \sup_{f\in\mathcal{F}_{1,n}}\mathbb{E}\left[ f^{2}\right] & =\sup _{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ (\partial_{u}g_{o}(w)\tau (w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w))^{2}\right] \nonumber\\ & \leq C\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ ((h(w_{1})-h_{o}% (w_{1}))u_{g_{n}}^{\ast}(w))^{2}\right] \nonumber\\ & \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert ^{2}\sup _{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ (h(w_{1})-h_{o}(w_{1}))^{2}\right] \leq C\xi_{0,K}^{2}\delta_{h,n}^{2}. \label{AL-E5-5}% \end{align} Moreover, by the definition of $\mathcal{N}_{\gamma,n}$, (\ref{AA-AP-1}), Assumption \ref{AA-E-5}, Lemmas \ref{AL-E-2}.(b)\ and \ref{AL-E-2}.(g), \begin{align} & \sup_{h\in\mathcal{N}_{h,n}}\left\vert \partial_{u}g_{o}(w)\tau (w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\ & \leq C\sup_{h\in\mathcal{N}_{h,n}}\left\vert (h(w_{1})-h_{o}(w_{1}% ))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\ & \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{h\in \mathcal{N}_{h,n}}\left[ \left\vert h(w_{1})-h_{o,L}(w_{1})\right\vert +\left\vert h_{o,L}(w_{1})-h_{o}(w_{1})\right\vert \right] \nonumber\\ & \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{\gamma \in\mathcal{N}_{\gamma,n}}\left[ \xi_{0,K}\left\Vert \gamma-\gamma _{o,L}\right\Vert +CL^{-s_{1}/d_{w_{1}}}\right] \leq C\xi_{0,K}^{2}% \delta_{h,n}. \label{AL-E5-6}% \end{align} By Assumption \ref{AA-E-7},% \begin{equation} L\xi_{0,K}^{2}\delta_{h,n}^{2}\log(n)+\xi_{0,K}^{2}\delta_{h,n}L^{2}% (\log(n))^{2}n^{-1}=o(1). \label{AL-E5-7}% \end{equation} Collecting the results in Lemma \ref{AL-E-3}, (\ref{AL-E5-5}), (\ref{AL-E5-6}) and (\ref{AL-E5-7}), we can use Lemma 22 of Belloni et. al (2016) to show that \begin{equation} \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \partial_{u}% g_{o}(w)\tau(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right\} \right\vert =o_{p}(n^{-1/2}). \label{AL-E5-8}% \end{equation} The asserted result follows by (\ref{AL-E5-4}), (\ref{AL-E5-8}) and the triangle inequality. \end{proof} \bigskip \begin{lemma} \label{AL-E-6} Under Assumptions \ref{AA-E-1}-\ref{AA-E-7}, \[ \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ \tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast}\right\} \right\vert =o_{p}(n^{-1/2}). \] \end{lemma} \begin{proof} [Proof of Lemma \ref{AL-E-6}]By the triangle inequality, (\ref{AA-AP-2}), Lemmas \ref{AL-E-2}.(e)-(f) \begin{align} & \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu _{n}\left\{ \tau(w_{h})(g_{o}(w_{h})-g_{o,K}(w_{h}))u_{g_{n}}^{\ast }(w)\right\} \right\vert \nonumber\\ & \leq CK^{-s/d}n^{-1}\sum_{i=1}^{n}\left[ \left\vert u_{g_{n}}^{\ast}% (w_{i})\right\vert +\mathbb{E}\left[ \left\vert u_{g_{n}}^{\ast}% (w_{i})\right\vert \right] \right] =o_{p}(n^{-1/2}), \label{AL-E6-1}% \end{align} where the equality is by Assumption \ref{AA-E-7}.\ By the first order expansion and the Cauchy-Schwarz inequality, for any $g\in\mathcal{N}_{g,n}$,% \begin{align} & \left\vert \tau(w_{h})(g_{o,K}(w_{h})-g(w_{h})-g_{o,K}(w)+g(w))u_{g_{n}% }^{\ast}(w)\right\vert \nonumber\\ & =\left\vert \tau(w_{h})(\beta-\beta_{o,K})^{\prime}(P(w_{h})-P(w))u_{g_{n}% }^{\ast}(w)\right\vert \nonumber\\ & \leq\xi_{1,K}\left\Vert \beta-\beta_{o,K}\right\Vert \left\vert u_{g_{n}% }^{\ast}(w)(h(w_{1})-h_{o}(w_{1}))\right\vert \label{AL-E6-2}% \end{align} which together with the definition of $\mathcal{N}_{h,n}$, the triangle inequality and Lemmas \ref{AL-E-2}.(a) and \ref{AL-E-2}.(f)\ implies that% \begin{align} & \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}n^{-1}\sum_{i=1}% ^{n}\left\vert \tau(w_{i,h})(g_{o,K}(w_{i,h})-g(w_{i,h})-g_{o,K}% (w_{i})+g(w_{i}))u_{g_{n}}^{\ast}(w_{i})\right\vert \nonumber\\ & \leq\xi_{1,K}\delta_{g,n}\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}% ^{n}\left\vert u_{g_{n}}^{\ast}(w_{i})(h(w_{1,i})-h_{o}(w_{1,i}))\right\vert \nonumber\\ & \leq\xi_{1,K}\delta_{g,n}\sup_{h\in\mathcal{N}_{h,n}}\left( n^{-1}% \sum_{i=1}^{n}\left\vert u_{g_{n}}^{\ast}(w_{i})\right\vert ^{2}n^{-1}% \sum_{i=1}^{n}\left\vert h(w_{1,i})-h_{o}(w_{1,i})\right\vert ^{2}\right) ^{1/2}\nonumber\\ & =O_{p}(\xi_{1,K}\delta_{g,n}\delta_{h,n})=o_{p}(n^{-1/2}) \label{AL-E6-3}% \end{align} where the equality is by Assumption \ref{AA-E-7}. Similarly, we can show that \begin{equation} \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\mathbb{E}\left[ \left\vert \tau(w_{i,h})(g_{o,K}(w_{i,h})-g(w_{i,h})-g_{o,K}(w_{i})+g(w_{i}))u_{g_{n}% }^{\ast}(w_{i})\right\vert \right] =o(n^{-1/2}), \label{AL-E6-4}% \end{equation} which together with (\ref{AL-E6-3}) implies that \begin{equation} \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ \tau(w_{h})(g_{o,K}(w_{h})-g(w_{h})-g_{o,K}(w)+g(w))u_{g_{n}}^{\ast }(w)\right\} \right\vert =o_{p}(n^{-1/2}). \label{AL-E6-5}% \end{equation} Recall that $\mathcal{F}_{2,n}=\{(x,w_{1})\mapsto\tau(w_{h})u_{g_{n}}^{\ast }(w)P(w)^{\prime}\alpha:h\in\mathcal{N}_{h,n}$, $\alpha\in\mathbb{S}^{K-1}\}$, where $\mathbb{S}^{K-1}=\{ \alpha\in\mathbb{R}^{K}:\alpha^{\prime}\alpha=1\}$. By Lemma \ref{AL-E-2}.(g) and $\tau(w)^{2}=\tau(w)$, \begin{align} \sup_{f\in\mathcal{F}_{2,n}}\mathbb{E}\left[ f^{2}\right] & =\sup _{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\mathbb{E}\left[ (\tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha)^{2}\right] \nonumber\\ & \leq\sup_{w}(u_{g_{n}}^{\ast}(w))^{2}\sup_{\alpha\in\mathbb{S}^{K-1}% }\mathbb{E}\left[ (\tau(w)P(w)^{\prime}\alpha)^{2}\right] \leq C\xi _{0,K}^{2}. \label{AL-E6-6}% \end{align} Similarly,% \begin{equation} \sup_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\left\vert \tau (w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha\right\vert \leq\sup_{\alpha \in\mathbb{S}^{K-1}}\left\vert u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha \right\vert \leq C\xi_{0,K}^{2}. \label{AL-E6-7}% \end{equation} Collecting the results in Lemma \ref{AL-E-4}, (\ref{AL-E6-6}) and (\ref{AL-E6-7}), we can use Lemma 22 of Belloni et. al (2016) to show that \begin{equation} \sup_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\left\vert \mu _{n}\left\{ \tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha\right\} \right\vert =O_{p}((L+K)^{1/2}\xi_{0,K}(\log(n))^{1/2}n^{-1/2}). \label{AL-E6-8}% \end{equation} By the definition of $\mathcal{N}_{g,n}$ and (\ref{AL-E6-8}), \begin{align} & \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu _{n}\left\{ \tau(w_{h})(g_{o,K}(w)-g(w))u_{g_{n}}^{\ast}(w)\right\} \right\vert \nonumber\\ & \leq\sup_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\left\vert \mu_{n}\left\{ \tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha\right\} \right\vert \sup_{\beta\in\mathcal{N}_{\beta,n}}\left\Vert \beta-\beta _{o,K}\right\Vert \nonumber\\ & =O_{p}(\delta_{g,n}(L+K)^{1/2}\xi_{0,K}(\log(n))^{1/2}n^{-1/2}% )=o_{p}(n^{-1/2}) \label{AL-E6-9}% \end{align} where the second equality is by Assumption \ref{AA-E-7}. Collecting the results in (\ref{AL-E6-1}), (\ref{AL-E6-5}) and (\ref{AL-E6-9}), and applying the triangle inequality, we immediately prove the asserted result. \end{proof} \section{Extra Simulation Results} In this section, we study the finite sample performance of the two-step nonparametric M estimator and the proposed inference method when the nonparametric regressor may have unbounded support.\ The simulated data is from the following model% \begin{align} y_{i} & =w_{1,i}\theta_{o}+m_{o}(h_{o}(x_{i}))+u_{i},\label{MC-1}\\ s_{i} & =h_{o}(x_{i})+\varepsilon_{i}, \label{MC-2}% \end{align} where $\theta_{o}=1$; $h_{o}(x)=2\cos(\pi x)$, $m_{o}(w_{2})=\sin(\pi w_{2})$ and $w_{2}=h_{o}(x)$.\ For $i=1,\ldots,n$, we independently draw $(w_{1,i},x_{\ast,i},u_{i},\varepsilon_{i})^{\prime}$ from $N(0,I_{4})$ and then calculate \begin{equation} x_{i}=2^{-1/2}(w_{1,i}+x_{\ast,i}). \label{MC-3A}% \end{equation} The data $\left\{ y_{i},s_{i},w_{1,i},x_{i}\right\} _{i=1}^{n}$ are generated using the equations in (\ref{MC-1}) and (\ref{MC-2}). The first-step and second-step nonparametric estimators and the consistent variance estimator take the same forms as their counterparts in Section 7 of HLR and hence are omitted here. We consider sample sizes $n=100$, $250$ and $500$ in this simulation study. For each sample size, we generate 10000 simulated samples to evaluate the performances of the two-step sieve estimator and the proposed inference procedure. For each simulated sample, we calculate the sieve estimator of $(\theta_{o},m_{o})$, and the 0.90 confidence interval of $\theta_{o}$ for each combination of $(L,K)$ where $L=2,\ldots,16$ and $K=2,\ldots,21$. The simulation results are reported in Figures 4.1 and 4.2.% %TCIMACRO{\TeXButton{B}{\begin{figure}[tbp] \centering}}% %BeginExpansion \begin{figure}[tbp] \centering %EndExpansion $% \begin{array} [c]{c}% \text{Figure 4.1. The Mean Squared Errors of the Two-step Sieve M Estimators of }m_{o}\text{ and }\theta_{o}\text{ (DGP2)}\\ \\% %TCIMACRO{\FRAME{itbpF}{5.7588in}{7.0621in}{0in}{}{}{dgp2_{m}se.png}% %{\special{ language "Scientific Word"; type "GRAPHIC"; display "USEDEF"; %valid_file "F"; width 5.7588in; height 7.0621in; depth 0in; %original-width 8.0004in; original-height 9.6997in; cropleft "0"; %croptop "1"; cropright "1"; cropbottom "0"; %filename 'graphics/DGP2_MSE.png';file-properties "XNPEU";}} }% %BeginExpansion {\includegraphics[ natheight=9.699700in, natwidth=8.000400in, height=7.0621in, width=5.7588in ]% {graphics/DGP2_MSE.png}% } %EndExpansion \end{array} $% %TCIMACRO{\TeXButton{caption}{\caption{\small %{1. The left panel represents the MSEs of the two-step sieve estimator of $m_{o}% %$ for sample sizes n=100, 250 and 500 respectively; 2. the right panel represents the MSEs of the two-step sieve estimator of $\theta %_{o}$ for sample sizes n=100, 250 and 500 respectively; 3. $L^{*}% %$ and $K^{*}% %$ denote the numbers of the series terms which produce sieve estimator of $m_{o}% %$ with the smallest finite sample MSE (in the left panel) or sieve estimator of $\theta %_{o}% %$ with the smallest finite sample MSE (in the left panel); 4. the dotted line represents the MSE of the two-step sieve M estimator with $L=L^{*}% %$ and $K=K^{*}% %$; 5. the solid line represents the MSE of the two-step sieve M estimator with $L$ and $K$ selected by 5-fold cross-validation.}% %}}}% %BeginExpansion \caption{\small {1. The left panel represents the MSEs of the two-step sieve estimator of $m_{o}% $ for sample sizes n=100, 250 and 500 respectively; 2. the right panel represents the MSEs of the two-step sieve estimator of $\theta _{o}$ for sample sizes n=100, 250 and 500 respectively; 3. $L^{*}% $ and $K^{*}% $ denote the numbers of the series terms which produce sieve estimator of $m_{o}% $ with the smallest finite sample MSE (in the left panel) or sieve estimator of $\theta _{o}% $ with the smallest finite sample MSE (in the left panel); 4. the dotted line represents the MSE of the two-step sieve M estimator with $L=L^{*}% $ and $K=K^{*}% $; 5. the solid line represents the MSE of the two-step sieve M estimator with $L$ and $K$ selected by 5-fold cross-validation.}% }% %EndExpansion% %TCIMACRO{\TeXButton{E}{\end{figure}}}% %BeginExpansion \end{figure}% %EndExpansion % %TCIMACRO{\TeXButton{B}{\begin{figure}[tbp] \centering}}% %BeginExpansion \begin{figure}[tbp] \centering %EndExpansion $% \begin{array} [c]{c}% \text{Figure 4.2. The Convergence Probability and the Average Length of the Confidence Interval of }\theta_{o}\text{ (DGP2)}\\ \\% %TCIMACRO{\FRAME{itbpF}{5.7769in}{7.0621in}{0in}{}{}{dgp2_{c}i.png}% %{\special{ language "Scientific Word"; type "GRAPHIC"; display "USEDEF"; %valid_file "F"; width 5.7769in; height 7.0621in; depth 0in; %original-width 8.1993in; original-height 9.6997in; cropleft "0"; %croptop "1"; cropright "1"; cropbottom "0"; %filename 'graphics/DGP2_CI.png';file-properties "XNPEU";}} }% %BeginExpansion {\includegraphics[ natheight=9.699700in, natwidth=8.199300in, height=7.0621in, width=5.7769in ]% {graphics/DGP2_CI.png}% } %EndExpansion \end{array} $% %TCIMACRO{\TeXButton{caption}{\caption{\small %{1. The left panel presents the coverage probability of the confidence interval of $\theta %_{o}% %$ for sample sizes n=100, 250 and 500 respectively; 2. the right panel presents the average length of the confidence interval of $\theta %_{o}% %$ for sample sizes n=100, 250 and 500 respectively; 3. the dotted line in the left panel is the 0.90 line which represents the nominal coverage of the confidence interval; 4. the solid line represents the coverage probability of the confidence interval based on the two-step sieve estimator with $K$ and $L$ selected by 5-fold cross-validation.}% %}}}% %BeginExpansion \caption{\small {1. The left panel presents the coverage probability of the confidence interval of $\theta _{o}% $ for sample sizes n=100, 250 and 500 respectively; 2. the right panel presents the average length of the confidence interval of $\theta _{o}% $ for sample sizes n=100, 250 and 500 respectively; 3. the dotted line in the left panel is the 0.90 line which represents the nominal coverage of the confidence interval; 4. the solid line represents the coverage probability of the confidence interval based on the two-step sieve estimator with $K$ and $L$ selected by 5-fold cross-validation.}% }% %EndExpansion% %TCIMACRO{\TeXButton{E}{\end{figure}}}% %BeginExpansion \end{figure}% %EndExpansion The properties of the two-step sieve M estimator and the proposed confidence interval are similar to what we found in the other DGP employed in HLR. We list some important differences. First, when the unknown function estimated in the first-step has unbounded support, the optimal $L$ which produces a two-step M estimator with the smallest MSE is much larger. Second, the ratio between the MSE of the cross-validated estimator of $m_{o}$ and the optimal MSE does not seem to converge to 1 in all the sample sizes we considered. However, the MSE of the cross-validated estimator of $\theta_{o}$ does approach the optimal value quickly as the sample size increases. Third, when $L$ is small (e.g., $L=4$), the proposed confidence interval over-covers the unknown parameter $\theta_{o}$ and its length diverges with increasing $K$. Fourth, the coverage probability of the confidence interval based on the cross-validated sieve estimator is almost identical to the nominal level even when the sample size is small (e.g., $n=100$). \section{Consistency and Convergence Rate\label{CR}} In this appendix, we first derive the consistency of the second-step sieve M estimator\textbf{ }$\widehat{g}_{n}$\textbf{ }under the metric\textbf{ }$\left\Vert \cdot\right\Vert _{\mathcal{G}}$ defined on $\mathcal{G}$. Given the consistency, we then focus on a local neighborhood of $g_{o}$ to calculate the convergence rate of $\widehat{g}_{n}$. Under mild conditions, the first-step sieve M estimator $\widehat{h}_{n}$ is consistent (see, e.g., Theorem 3.1 of Chen, 2007), and also has rate of convergence under a pseudo-metric $\left\Vert \cdot\right\Vert _{\mathcal{H}}$.\footnote{See, e.g., Shen and Wong (1994) and Chen and Shen (1998) for the convergence rate of the one-step (approximate) sieve M estimator for i.i.d. data and weakly dependent data respectively.} Let $\delta_{h,n}^{\ast}=O(\varepsilon_{1,n})$ be a small positive number that goes to zero as $n\rightarrow\infty$. Without loss of generality we denote $||\widehat{h}_{n}-h_{o}||_{\mathcal{H}}% =O_{p}(\delta_{h,n}^{\ast})$ as the convergence rate. Hence we can assume that $\widehat{h}_{n}$ belongs to a shrinking neighborhood\textbf{ }$\mathcal{N}% _{h,n}=\{h\in\mathcal{H}_{n}:\left\Vert h-h_{o}\right\Vert _{\mathcal{H}}% \leq\delta_{h,n}\}$\textbf{ }of\textbf{ }$h_{o}$\textbf{ }wpa1, where\textbf{ }$\delta_{h,n}=\delta_{h,n}^{\ast}\log(\log(n))=o(1)$. \subsection{Consistency of the second step sieve M estimation} The following conditions are sufficient for the consistency of\textbf{ }$\widehat{g}_{n}$\textbf{ }under $\left\Vert \cdot\right\Vert _{\mathcal{G}}$. \begin{assumption} \label{C1} (i)\ $\mathbb{E}\left[ \psi\left( Z_{2},g_{o},h_{o}\right) \right] >-\infty$ and if $\mathbb{E}\left[ \psi\left( Z_{2},g_{o}% ,h_{o}\right) \right] =\infty$, then $\mathbb{E}\left[ \psi\left( Z_{2},g,h_{o}\right) \right] <\infty$ for all $g\in\mathcal{G}_{n}% \backslash\left\{ g_{o}\right\} $ and for all $n\geq1$; (ii) for all $\varepsilon>0$, there exists some non-increasing positive sequence $c_{n}(\varepsilon)$ such that for all $n\geq1$ \begin{equation} \mathbb{E}\left[ \psi\left( Z_{2},g_{o},h_{o}\right) \right] -\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}% \geq\varepsilon\right\} }\mathbb{E}\left[ \psi\left( Z_{2},g,h_{o}\right) \right] \geq c_{n}(\varepsilon) \label{ID}% \end{equation} and $\lim\inf_{n}c_{n}(\varepsilon)>0$ for all $\varepsilon>0$. \end{assumption} Assumption \ref{C1} is the identification uniqueness condition for $g_{o}$. For sieve M estimation a similar condition can be found in White and Wooldridge (1991). This assumption is stronger than Condition 3.1 of Theorem 3.1 in Chen (2007) and Condition a of Lemma A.2 in Chen and Pouzo (2012), because it requires $c_{n}(\varepsilon)$ to be bounded away from zero for all large $n$. It essentially requires that the second step sieve M estimation is well-posed under the strong metric $\left\Vert \cdot\right\Vert _{\mathcal{G}% }$. \begin{assumption} \label{C2} (i) $g_{o}\in\mathcal{G}\ $and $\left\Vert \cdot\right\Vert _{\mathcal{G}}$ is a metric defined on $\mathcal{G}$ or some metric space containing $\mathcal{G}$; (ii) $\mathcal{G}_{n}\subset\mathcal{G}_{n+1}% \subset\mathcal{G}$ for all $n\geq1$ and there exists some $g_{n}% \in\mathcal{G}_{n}$ such that \begin{equation} \left\vert \mathbb{E}\left[ \psi(Z_{2},g_{n},h_{o})-\psi(Z_{2},g_{o}% ,h_{o})\right] \right\vert =O(\eta_{2,n}) \label{SIA}% \end{equation} where $\eta_{2,n}\ $is some finite positive non-increasing sequence. \end{assumption} Assumption \ref{C2} imposes conditions on the sieve spaces. It is essentially Condition b of Lemma A.2 in Chen and Pouzo (2012). It is also implied by Conditions 3.2 and 3.3 of Theorem 3.1 in Chen (2007). The condition in (\ref{SIA}) is clearly implied by the convergence rate of the sieve approximation error of $\left\Vert g_{n}-g_{o}\right\Vert _{s,2}$ and the continuity of the criterion function $\mathbb{E}\left[ \psi\left( Z_{2},g,h_{o}\right) \right] $ for all $g\in\mathcal{G}_{n}$ in the local neighborhood of $g_{o}$. In the following we denote $\mu_{n}\left[ \psi\left( Z_{2},g,h\right) \right] \equiv\frac{1}{n}\sum_{i=1}^{n}\left\{ \psi\left( Z_{2,i},g,h\right) -\mathbb{E}\left[ \psi\left( Z_{2}% ,g,h\right) \right] \right\} $. \begin{assumption} \label{C3} (i) $\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left[ \psi(Z_{2},g,h)\right] \right\vert =O_{p}(\eta_{0,n})$ where $\left\{ \eta_{0,n}\right\} \ $is some finite positive non-increasing sequence going to zero; (ii) there is a finite positive non-increasing sequence $\left\{ \eta_{1,n}\right\} $ going to zero such that% \[ \sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ \psi(Z_{2},g,h)-\psi\left( Z_{2},g,h_{o}\right) \right] \right\vert =O(\eta_{1,n}). \] \end{assumption} Assumption \ref{C3} is similar to Condition 3.5 of Theorem 3.1 in Chen (2007) and the first part of Condition d of Lemma A.2 in Chen and Pouzo (2012). Assumption \ref{C3}.(i) can be verified by applying a standard empirical process result. Assumption \ref{C3}.(ii)\ can be verified by the convergence rate of the first-step sieve M estimator $\widehat{h}_{n}$ and the continuity of the criterion function $\mathbb{E}\left[ \psi\left( Z_{2},g,h\right) \right] $ in $h\in\mathcal{N}_{h,n}$ uniformly over $g\in\mathcal{G}_{n}$. \begin{theorem} \label{CST} Let Assumptions \ref{C1}, \ref{C2} and \ref{C3} hold. If% \begin{equation} \max\left\{ \eta_{0,n},\eta_{1,n},\eta_{2,n},\varepsilon_{2,n}^{2}\right\} =o(1) \label{RT}% \end{equation} then the second-step sieve M estimator is consistent under $\left\Vert \cdot\right\Vert _{\mathcal{G}}$, i.e. $\left\Vert \widehat{g}_{n}% -g_{o}\right\Vert _{\mathcal{G}}=o_{p}(1)$. \end{theorem} \begin{proof} [Proof of Theorem \ref{CST}]Let $Q_{n}\left( g,h\right) \equiv\frac{1}% {n}\sum\nolimits_{i=1}^{n}\psi\left( Z_{2,i},g,h\right) $ and $Q\left( g,h\right) \equiv\mathbb{E}\left[ \psi\left( Z_{2},g,h\right) \right] $. Let $I_{n}(\varepsilon)\equiv\Pr\left( \left\Vert \widehat{g}_{n}% -g_{o}\right\Vert _{\mathcal{G}}>\varepsilon\right) $. For any $\varepsilon >0$, by the definition of $\widehat{g}_{n}$, we have% \begin{equation} I_{n}(\varepsilon)\leq\Pr\left( \sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}\geq\varepsilon\right\} }Q_{n}(g,\widehat{h}% _{n})\geq Q_{n}(g_{n},\widehat{h}_{n})-O_{p}\left( \varepsilon_{2,n}% ^{2}\right) \right) . \label{P-THM1-0}% \end{equation} Rewrite the inequality inside the parentheses on the RHS as \begin{equation} -\left[ Q_{n}(g_{n},\widehat{h}_{n})-Q\left( g_{o},h_{o}\right) \right] +O_{p}\left( \varepsilon_{2,n}^{2}\right) \geq Q\left( g_{o},h_{o}\right) -\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}% \geq\varepsilon\right\} }Q_{n}(g,\widehat{h}_{n}). \label{P-THM1-1}% \end{equation} Note that the first two terms on the LHS of the above inequality can be rewritten as \begin{align*} & -\left[ Q_{n}(g_{n},\widehat{h}_{n})-Q\left( g_{o},h_{o}\right) \right] \\ & =-\mu_{n}\left[ \psi(Z_{2},g_{n},\widehat{h}_{n})\right] -\left[ Q(g_{n},\widehat{h}_{n})-Q\left( g_{n},h_{o}\right) \right] -\left[ Q\left( g_{n},h_{o}\right) -Q\left( g_{o},h_{o}\right) \right] \end{align*} which implies that if $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ with probability approaching 1 (wpa1), then \begin{equation} -\left[ Q_{n}(g_{n},\widehat{h}_{n})-Q\left( g_{o},h_{o}\right) \right] \leq I_{1,n}+I_{2,n}+I_{3,n}, \label{P-THM1-2}% \end{equation} where% \begin{align*} I_{1,n} & \equiv\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left[ \psi\left( Z_{2},g,h\right) \right] \right\vert ,\\ I_{2,n} & \equiv\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert Q\left( g,h\right) -Q\left( g,h_{o}\right) \right\vert ,\\ I_{3,n} & \equiv\left\vert Q(g_{n},h_{o})-Q(g_{o},h_{o})\right\vert . \end{align*} Similarly if $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ wpa1, then for any $g\in\mathcal{G}_{n}$, \begin{align} Q_{n}(g,\widehat{h}_{n}) & =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}% _{n})\right] +\left[ Q(g,\widehat{h}_{n})-Q(g,h_{o})\right] +Q(g,h_{o}% )\nonumber\\ & \leq\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert \mu _{n}\left[ \psi\left( Z_{2},g,h\right) \right] \right\vert +\sup _{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert Q(g,h)-Q(g,h_{o}% )\right\vert +Q(g,h_{o})\nonumber\\ & =I_{1,n}+I_{2,n}+Q(g,h_{o}). \label{P-THM1-3}% \end{align} Therefore when $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ wpa1, we may note that the term on the RHS of (\ref{P-THM1-1}) is such that \begin{align} & Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}% ||_{\mathcal{G}}\geq\varepsilon\right\} }Q_{n}(g,\widehat{h}_{n})\nonumber\\ & \geq-I_{1,n}-I_{2,n}+Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}% _{n}:\text{ }||g-g_{o}||_{\mathcal{G}}\geq\varepsilon\right\} }Q(g,h_{o}). \label{P-THM1-4}% \end{align} From (\ref{P-THM1-0}), (\ref{P-THM1-1}), (\ref{P-THM1-2}) and (\ref{P-THM1-4}% ), we get% \begin{equation} I_{n}(\varepsilon)\leq\Pr\left( 2\sum\limits_{j=1}^{3}I_{j,n}+O_{p}% (\varepsilon_{2,n}^{2})\geq Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}% _{n}:\text{ }||g-g_{o}||_{\mathcal{G}}\geq\varepsilon\right\} }% Q(g,h_{o})\right) +\Pr\left( \widehat{h}_{n}\notin\mathcal{N}_{h,n}\right) . \label{P-THM1-5}% \end{equation} If $Q(g_{o},h_{o})=\infty$, then using Assumption \ref{C1}.(i), we have\textbf{ }% \begin{equation} Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}% ||_{\mathcal{G}}\geq\varepsilon\right\} }Q(g,h_{o})=\infty. \label{P-THM1-6}% \end{equation} However, from Assumption \ref{C2}.(ii) and \ref{C3}, we get\textbf{ }% $\max\{I_{1,n},I_{2,n},I_{3,n}\}=O_{p}(1)$\textbf{, }which together with (\ref{P-THM1-5}), (\ref{P-THM1-6}), $\varepsilon_{2,n}=o(1)$ and the definition of\textbf{ }$\mathcal{N}_{h,n}$\textbf{ }implies that% \[ I_{n}(\varepsilon)\leq\Pr\left( \widehat{h}_{n}\notin\mathcal{N}% _{h,n}\right) \rightarrow0\text{ as }n\rightarrow\infty\text{.}% \] \textbf{ }On the other hand, if $Q(g_{o},h_{o})<\infty$, then using (\ref{P-THM1-5}) and Assumption \ref{C1}.(ii), we get \begin{equation} I_{n}(\varepsilon)\leq\Pr\left( \frac{2I_{1,n}+2I_{2,n}+2I_{3,n}% +O_{p}(\varepsilon_{2,n}^{2})}{c_{n}(\varepsilon)}\geq1\right) +\Pr\left( \widehat{h}_{n}\notin\mathcal{N}_{h,n}\right) . \label{P-THM1-7}% \end{equation} Assumption \ref{C1}.(ii), Assumption \ref{C2}.(ii), Assumption \ref{C3} and the condition (\ref{RT}) imply that% \[ \frac{2I_{1,n}+2I_{2,n}+2I_{3,n}+O_{p}(\varepsilon_{2,n}^{2})}{c_{n}% (\varepsilon)}=o_{p}(1) \] for any $\varepsilon>0$. Combining this result with (\ref{P-THM1-7}) and the definition of $\mathcal{N}_{h,n}$, we conclude that $I_{n}(\varepsilon )\rightarrow0$ as $n$ goes to infinity. This finishes the proof. \end{proof} \subsection{Rate of convergence of the second step sieve M estimation} After the consistency of the second-step sieve M estimator $\widehat{g}_{n}$ is established, we can focus on the local neighborhood of $g_{o}$ to compute the convergence rate of $\widehat{g}_{n}$ under $\left\Vert \cdot\right\Vert _{\mathcal{G}}$. Let $K_{2}$ be a generic finite and positive constant and define% \[ \mathcal{N}_{2,K_{2}}\equiv\left\{ g\in\mathcal{G}_{n}:||g-g_{o}% ||_{\mathcal{G}}\leq K_{2}\right\} , \] then by the consistency of $\widehat{g}_{n}$, we have $\widehat{g}_{n}% \in\mathcal{N}_{2,K_{2}}$ wpa1. Moreover, given the convergence rate $\delta_{1,n}^{\ast}$ of the first-step sieve M estimator $\widehat{h}_{n}$, we can define \[ \mathcal{N}_{1,K_{1}}\equiv\left\{ h\in\mathcal{H}_{n}:||h-h_{o}% ||_{\mathcal{H}}/\delta_{h,n}^{\ast}\leq K_{1}\right\} \] such that for any small constant $\omega>0$, there is a finite constant $K_{\omega}>0$ such that \begin{equation} \Pr(\widehat{h}_{n}\notin\mathcal{N}_{1,K_{\omega}})\leq\omega\text{ for all }n\text{.} \label{TIG}% \end{equation} The following general conditions are sufficient for deriving the convergence rate of $\widehat{g}_{n}$. \begin{assumption} \label{R1} There are some finite, positive and non-increasing sequences $\delta_{1,n}$, $\delta_{2,n}$ and $\delta_{n}$ that go to zero as $n\rightarrow\infty$ such that the following hold for any fixed finite constants $K_{1}>0,$ $K_{2}>0$:\ (i)% \begin{equation} \sup_{h\in\mathcal{N}_{1,K_{1}}}\left\vert \mathbb{E}\left[ \psi(Z_{2}% ,g_{n},h)-\psi(Z_{2},g_{o},h)\right] \right\vert =O(\delta_{2,n}^{2}); \label{AR1-0}% \end{equation} (ii)\ for any small constant $\delta,\widetilde{\delta}>0$ and for any $g\in\mathcal{N}_{2,K_{2}}$ with $0<\widetilde{\delta}<\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}<\delta$ \begin{equation} \sup_{h\in\mathcal{N}_{1,K_{1}}}\mathbb{E}\left[ \psi\left( Z_{2}% ,g,h\right) -\psi\left( Z_{2},g_{o},h\right) \right] \leq c_{K_{1}% ,1}\delta_{1,n}\delta-c_{K_{1},2}\delta^{2}, \label{AR1-1}% \end{equation} where $c_{K_{1},1}\ $and $c_{K_{1},2}>0$ are finite constants only depending on $K_{1}$; (iii)% \begin{equation} \sup_{g\in\mathcal{N}_{2,K_{2}},h\in\mathcal{N}_{1,K_{1}}}\left\vert \mu _{n}\left[ \psi(Z,g,h)-\psi(Z,g,h_{o})\right] \right\vert =O_{p}(\delta _{n}^{2}); \label{AR1-2}% \end{equation} (iv) for all $n$ large enough and for any sufficiently small $\delta$, \begin{equation} \mathbb{E}\left[ \sup_{\left\{ g\in\mathcal{N}_{2,K_{2}}:\text{ }\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta\right\} }\left\vert \mu _{n}\left[ \psi(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\right] \right\vert \right] \leq\frac{c_{1}\phi_{n}(\delta)}{\sqrt{n}} \label{AR1-4}% \end{equation} where $c_{1}>0$ is some finite constant and $\phi_{n}(\cdot)$ is some function such that $\delta^{-\gamma}\phi_{n}(\delta)$ is a decreasing function for some $\gamma\in(0,2)$. \end{assumption} Assumption \ref{R1}.(i) imposes local smoothness condition on the function $\mathbb{E}\left[ \psi\left( Z_{2},\cdot,h\right) \right] $ uniformly over $h$ in some shrinking neighborhood. The rate $\delta_{2,n}$ is determined by the convergence rates of the sieve approximation error of $g_{o}$ and the first step sieve estimator $\widehat{h}_{n}$. Assumption \ref{R1}.(ii) is a local identification condition. The term $\delta_{1,n}$ on the right side of the inequality (\ref{AR1-1}) represents the effect of first-step estimation on the second-step sieve estimate $\widehat{g}_{n}$. In Assumption \ref{R1}.(i), (ii) and (iii), the uniform convergence is imposed over local neighborhoods $\mathcal{N}_{1,K_{1}}$ and/or $\mathcal{N}_{2,K_{2}}$. That is particularly useful for establishing the convergence rate of $\widehat{g}_{n}$, because by the consistency of $\widehat{g}_{n}$ and the convergence rate of $\widehat {h}_{n}$, we can bound the probabilities of the events $\{ \widehat{g}% _{n}\notin\mathcal{N}_{2,K_{2}}\}$ and $\{ \widehat{h}_{n}\notin \mathcal{N}_{1,K_{1}}\}$ in finite samples by choosing sufficiently large $K_{1}$ and $K_{2}$. Assumption \ref{R1}.(iv) is a stochastic equicontinuity condition which is similar to the one in Theorem 3.4.1 of Van der Vaart and Wellner (1996). \begin{theorem} \label{T-rate} Suppose that the conditions in Theorem \ref{CST} and Assumption \ref{R1} are satisfied. Furthermore, if $\left\Vert g_{n}-g_{o}\right\Vert _{\mathcal{G}}=O(\delta_{2,n}^{\ast})$\ where $\delta_{2,n}^{\ast}$ is defined below and there is a finite, positive and non-increasing sequence $\delta_{g,n}$ such that \begin{equation} \left( \delta_{g,n}\right) ^{-2}\phi_{n}(\delta_{g,n})\leq c_{2}\sqrt{n}, \label{DR1}% \end{equation} then we have $\left\Vert \widehat{g}_{n}-g_{o}\right\Vert _{\mathcal{G}}% =O_{p}\left( \delta_{2,n}^{\ast}\right) $, where $\delta_{2,n}^{\ast}% \equiv\max\left\{ \delta_{1,n},\delta_{2,n},\delta_{n},\delta_{g,n}% ,\varepsilon_{2,n}\right\} $. \end{theorem} \begin{proof} [Proof of Theorem \ref{T-rate}]Let $\omega>0$ be some arbitrarily small constant. Because $\widehat{g}_{n}$ is consistent, we can choose a sufficiently large constant $K_{M}>0$ such that% \begin{equation} \Pr\left( ||\widehat{g}_{n}-g_{o}||_{\mathcal{G}}>K_{M}\right) \leq\omega. \label{P-RT-1a}% \end{equation} By $\left\Vert g_{n}-g_{o}\right\Vert _{\mathcal{G}}=o(1)$, we deduce that there is some sufficiently large $K_{g_{o}}$ such that $||g_{n}-g_{o}% ||_{\mathcal{G}}\leq K_{g_{o}}$. Let $K_{M}^{\ast}=\max\{K_{M},K_{g_{o}}\}$,% \[ \mathcal{G}_{n}(M)\equiv\left\{ g\in\mathcal{G}_{n}:2^{M}\delta_{2,n}^{\ast }<||g-g_{o}||_{\mathcal{G}}\leq K_{M}^{\ast}\right\} \] and $I_{M,n}\left( \omega\right) \equiv\Pr\left( ||\widehat{g}_{n}% -g_{o}||_{\mathcal{G}}>2^{M}\delta_{2,n}^{\ast}\right) $. Note that by (\ref{P-RT-1a}), we have \begin{equation} I_{M,n}\left( \omega\right) =\Pr\left( \widehat{g}_{n}\in\mathcal{G}% _{n}\left( M\right) \right) +\Pr\left( ||\widehat{g}_{n}-g_{o}% ||_{\mathcal{G}}>K_{M}^{\ast}\right) \leq\Pr\left( \widehat{g}_{n}% \in\mathcal{G}_{n}\left( M\right) \right) +\omega. \label{P-RT-1b}% \end{equation} We will prove that% \begin{equation} I_{M,n}\left( \omega\right) \leq\sum_{j\geq M,2^{j-1}\delta_{2,n}^{\ast}\leq K_{M}^{\ast}}\frac{c_{1}c_{2}\left[ (2^{j+1})^{\gamma}+K_{\varepsilon }^{\gamma}\right] }{\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}% \right\vert }+5\omega\label{P-RT-1c}% \end{equation} where $c_{1}$ and $c_{2}$ are defined in Assumption \ref{R1}.(iv) and (\ref{DR1}), $c_{K_{1},1}$, $c_{K_{1},2}$, $K_{\varepsilon}$ and $K$ are some fixed finite constants which may depend on $\omega$, and $\gamma\in(0,2)$ is defined in Assumption \ref{R1}.(iv). As $\gamma<2$, we can choose $M$ sufficiently large such that% \[ \sum_{j\geq M,2^{j-1}\delta_{2,n}^{\ast}\leq K_{M}^{\ast}}\frac{c_{1}% c_{2}\left[ (2^{j+1})^{\gamma}+K_{\varepsilon}^{\gamma}\right] }{\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert }<\omega, \] which together with (\ref{P-RT-1c}) implies that $I_{M,n}(\omega)\leq6\omega$. As we can let $\omega$ arbitrarily small, this would establish that $||\widehat{g}_{n}-g_{o}||_{\mathcal{G}}=O_{p}\left( \delta_{2,n}^{\ast }\right) $. Equation (\ref{P-RT-1c}) is established by combining (\ref{P-RT-2}), (\ref{P-RT-3}) and (\ref{P-RT-4}) below, which are proved in several steps. \noindent\underline{Step 1}:\ We prove that% \begin{equation} I_{M,n}(\omega)\leq\Pr\left( \sup_{g\in\mathcal{G}_{n}(M),h\in\mathcal{N}% _{1,K_{1}}}\left[ I_{1,n}(g,h_{o})+I_{2,n}(g,h)\right] +K\delta_{2,n}% ^{\ast2}\geq0\right) +5\omega\label{P-RT-2}% \end{equation} where\ $K_{1}$ is a fixed constant such that $\Pr\left( \widehat{h}_{n}% \notin\mathcal{N}_{1,K_{1}}\right) \leq\omega$ for all $n$,\ $K$ is some fixed constant defined below, \begin{align*} I_{1,n}\left( g,h_{o}\right) & \equiv\mu_{n}\left[ \psi(Z_{2}% ,g,h_{o})-\psi(Z_{2},g_{n},h_{o})\right] ,\\ \text{and }I_{2,n}\left( g,h\right) & \equiv Q(g,h)-Q(g_{o},h). \end{align*} For this purpose, we first note that by the definition of $\widehat{g}_{n}$, we can choose some sufficiently large constant $K_{1}>0$ such that \begin{equation} \Pr\left( Q_{n}(\widehat{g}_{n},\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}% _{n})+K_{1}\varepsilon_{2,n}^{2}<0\right) \leq\omega. \label{P-RT-2a}% \end{equation} Combining (\ref{P-RT-1b}) and (\ref{P-RT-2a}), we have \begin{equation} I_{M,n}\left( \omega\right) \leq\Pr\left( \sup_{g\in\mathcal{G}_{n}% (M)}Q_{n}(g,\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}_{n})+K_{1}\varepsilon _{2,n}^{2}\geq0\right) +2\omega. \label{P-RT-2b}% \end{equation} It is clear that the term inside the parentheses on the RHS of (\ref{P-RT-2b}) is such that% \begin{align*} & Q_{n}(g,\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}_{n})\\ & =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}_{n})-\psi(Z_{2},g_{n},\widehat {h}_{n})\right] +Q(g,\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})\\ & =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}_{n})-\psi\left( Z_{2}% ,g,h_{o}\right) \right] +\mu_{n}\left[ \psi\left( Z_{2},g_{n}% ,h_{o}\right) -\psi(Z_{2},g_{n},\widehat{h}_{n})\right] \\ & +\mu_{n}\left[ \psi\left( Z_{2},g,h_{o}\right) -\psi\left( Z_{2}% ,g_{n},h_{o}\right) \right] +Q(g,\widehat{h}_{n})-Q(g_{o},\widehat{h}_{n})\\ & +Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n}), \end{align*} and therefore,% \begin{align} & Q_{n}(g,\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}_{n})\nonumber\\ & =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}_{n})-\psi\left( Z_{2}% ,g,h_{o}\right) \right] +\mu_{n}\left[ \psi\left( Z_{2},g_{n}% ,h_{o}\right) -\psi(Z_{2},g_{n},\widehat{h}_{n})\right] \nonumber\\ & +Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})+I_{1,n}\left( g,h_{o}\right) +I_{2,n}(g,\widehat{h}_{n}). \label{P-RT-2c}% \end{align} From Assumption \ref{R1}.(iii), we can choose some constant $K_{2}$ sufficiently large such that% \begin{align} & \Pr\left( \sup_{g\in\mathcal{G}_{n}(M)}\mu_{n}\left[ \psi(Z_{2}% ,g,\widehat{h}_{n})-\psi(Z_{2},g,h_{o})\right] \geq K_{2}\delta_{n}% ^{2},\widehat{h}_{n}\in\mathcal{N}_{1,K_{1}}\right) \nonumber\\ & \leq\Pr\left( \sup_{g\in\mathcal{N}_{2,K_{M}^{\ast}},h\in\mathcal{N}% _{1,K_{1}}}\left\vert \mu_{n}\left[ \psi(Z_{2},g,h)-\psi(Z_{2},g,h_{o}% )\right] \right\vert \geq K_{2}\delta_{n}^{2}\right) \leq\omega. \label{P-RT-2d}% \end{align} Combining (\ref{P-RT-2b}), (\ref{P-RT-2c}), and (\ref{P-RT-2d}), we obtain% \begin{equation} I_{M,n}\left( \omega\right) \leq\Pr\left[ \left( \begin{array} [c]{c}% \mu_{n}\left[ \psi\left( Z_{2},g_{n},h_{o}\right) -\psi(Z_{2}% ,g_{n},\widehat{h}_{n})\right] \\ +Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})\\ +\sup_{g\in\mathcal{G}_{n}(M)}\left[ I_{1,n}\left( g,h_{o}\right) +I_{2,n}(g,\widehat{h}_{n})\right] \\ +K_{1}\varepsilon_{2,n}^{2}+K_{2}\delta_{n}^{2}% \end{array} \right) \geq0,\widehat{h}_{n}\in\mathcal{N}_{1,K_{1}}\right] +4\omega. \label{P-RT-2b1}% \end{equation} By the definition of $\mathcal{N}_{2,K_{M}^{\ast}}$, we have $g_{n}% \in\mathcal{N}_{2,K_{M}^{\ast}}$, which together with Assumption \ref{R1}.(iii) implies that \begin{align} & \Pr\left( \mu_{n}\left[ \psi(Z_{2},g_{n},h_{o})-\psi(Z_{2},g_{n}% ,\widehat{h}_{n})\right] \geq K_{2}\delta_{n}^{2},\widehat{h}_{n}% \in\mathcal{N}_{1,K_{1}}\right) \nonumber\\ & \leq\Pr\left( \sup_{g\in\mathcal{N}_{2,K_{M}^{\ast}},h\in\mathcal{N}% _{1,K_{1}}}\left\vert \mu_{n}\left[ \psi(Z_{2},g,h_{o})-\psi(Z_{2}% ,g,h)\right] \right\vert \geq K_{2}\delta_{n}^{2}\right) \leq\omega. \label{P-RT-2e}% \end{align} By the same argument that led to (\ref{P-RT-2b1}), we obtain% \begin{equation} I_{M,n}\left( \omega\right) \leq\Pr\left[ \left( \begin{array} [c]{c}% Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})\\ +\sup_{g\in\mathcal{G}_{n}(M)}\left[ I_{1,n}\left( g,h_{o}\right) +I_{2,n}(g,\widehat{h}_{n})\right] \\ +K_{1}\varepsilon_{2,n}^{2}+2K_{2}\delta_{n}^{2}% \end{array} \right) \geq0,\widehat{h}_{n}\in\mathcal{N}_{1,K_{1}}\right] +5\omega. \label{P-RT-2b2}% \end{equation} From Assumption \ref{R1}.(i), we can choose some constant $K_{3}$ sufficiently large such that% \[ \sup_{h\in\mathcal{N}_{1,K_{1}}}\left\vert \mathbb{E}\left[ \psi\left( Z_{2},g_{o},h\right) -\psi\left( Z_{2},g_{n},h\right) \right] \right\vert 1$ large enough such that $||g_{n}-g_{o}||_{\mathcal{G}}\leq K_{\varepsilon}\delta_{2,n}^{\ast}$. Using Assumption \ref{R1}.(iv) and similar arguments in showing (\ref{P-RT-4b}), we deduce that% \begin{align} & \frac{\mathbb{E}\left[ \left\vert \mu_{n}\left[ \psi(Z_{2},g_{n}% ,h_{o})-\psi(Z_{2},g_{o},h_{o})\right] \right\vert \right] }{\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert \delta_{2,n}^{\ast2}% }\nonumber\\ & \leq\frac{\mathbb{E}\left[ \left\vert \sup\limits_{\left\{ g\in \mathcal{G}_{n}:||g-g_{o}||_{\mathcal{G}}\leq K_{\varepsilon}\delta _{2,n}^{\ast}\right\} }\mu_{n}\left[ \psi(Z_{2},g,h_{o})-\psi(Z_{2}% ,g_{o},h_{o})\right] \right\vert \right] }{\left\vert c_{K_{1},2}% 2^{2j}-K-c_{K_{1},1}2^{j}\right\vert \delta_{2,n}^{\ast2}}\nonumber\\ & \leq\frac{c_{1}(K_{\varepsilon}\delta_{2,n}^{\ast})^{\gamma}}{\sqrt {n}\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert \delta _{2,n}^{\ast2}}\frac{\phi_{n}(K_{\varepsilon}\delta_{2,n}^{\ast}% )}{(K_{\varepsilon}\delta_{2,n}^{\ast})^{\gamma}}\nonumber\\ & \leq\frac{c_{1}K_{\varepsilon}^{\gamma}}{\left\vert c_{K_{1},2}% 2^{2j}-K-c_{K_{1},1}2^{j}\right\vert }\frac{\phi_{n}(\delta_{2,n}^{\ast}% )}{\sqrt{n}\delta_{2,n}^{\ast2}}=\frac{c_{1}c_{2}K_{\varepsilon}^{\gamma}% }{\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert }. \label{P-RT-4c}% \end{align} From (\ref{P-RT-4a}), (\ref{P-RT-4b}) and (\ref{P-RT-4c}), we get (\ref{P-RT-4}). \end{proof} Theorem \ref{T-rate} indicates that the convergence rate of the second-step sieve M estimator is determined by the convergence rate $\max\{ \delta _{1,n},\delta_{n}\}$ of the estimation error introduced by the first-step sieve estimation, the rate $\delta_{2,n}$ of the sieve approximation error of $g_{o}$, the convergence rate $\varepsilon_{2,n}$ of the optimization error and the measure $\delta_{g,n}$ of the complexity of the sieve space $\mathcal{G}_{n}$. Let $\Psi_{n,\delta}\equiv\left\{ \psi(Z_{2},g,h_{o})-\psi(Z_{2},g_{o}% ,h_{o}):\text{ }\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta ,g\in\mathcal{N}_{2,K}\right\} $ and let $H_{[]}\left( u,\Psi_{n,\delta },\left\Vert \cdot\right\Vert _{2}\right) $ denote the bracket entropy of the function class $\Psi_{n,\delta}$ with respect to the $L_{2}(dF_{Z})$-norm $\left\Vert \cdot\right\Vert _{2}$. Define \[ J_{[]}\left( \delta,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right) =\int_{0}^{\delta}H_{[]}\left( u,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right) du. \] Assumption \ref{R1}.(iii) and (iv) can be replaced by the following low level conditions. \begin{assumption} \label{R1'} (i) The data are i.i.d.; (ii) \[ \sup_{\left\{ g\in\mathcal{N}_{2,K}:\text{ }\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta\right\} }\mathbb{E}\left[ \left\vert \psi (Z,g,h_{o})-\psi(Z,g_{o},h_{o})\right\vert ^{2}\right] \leq c\delta^{2}; \] (iii) for any small $\delta>0$, there exists a constant $s_{1}\in(0,2)$ such that \[ \sup_{\left\{ g\in\mathcal{N}_{2,K}:\text{ }\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta\right\} }\left\vert \psi(Z,g,h_{o})-\psi (Z,g_{o},h_{o})\right\vert \leq\delta^{s_{1}}U(Z) \] where $\mathbb{E}\left[ \left\vert U(Z)\right\vert ^{s_{2}}\right] \leq c$ for some $s_{2}\geq2$; (iv) there is a sequence of positive numbers $\delta_{g,n}$ such that% \[ \delta_{g,n}=\inf\left\{ \delta\in(0,1):\frac{J_{[]}\left( \delta ,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right) }{\sqrt{n}\delta ^{2}}\leq c\right\} , \] where $\delta^{-\gamma}J_{[]}\left( \delta,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right) $ is a decreasing function for some $\gamma \in(0,2)$. \end{assumption} Assumption \ref{R1'}.(i), (ii) and (iii) are directly from the sufficient conditions of Theorem 3.2 in Chen (2007) which establishes the convergence rate of one-step sieve M estimation with \emph{i.i.d.} or m-dependent data. The low level conditions in Assumption \ref{R1'} are easy to verify in practice. However, the advantage of the high level assumption (\ref{AR1-4}) is that it integrates the data structure and the metric entropy restriction into one simple stochastic equicontinuity condition. As a result, the convergence rate of the second-step sieve M estimator derived in this paper applies to the general scenario with time series observation. \begin{corollary} \label{C-rate} Suppose that the conditions in Theorem \ref{CST}, Assumption \ref{R1}.(i), (ii) and \ref{R1'} are satisfied. Furthermore, if $\left\Vert g_{n}-g_{o}\right\Vert _{\mathcal{G}}=O(\delta_{2,n}^{\ast})$, then we have $\left\Vert \widehat{g}_{n}-g_{o}\right\Vert _{\mathcal{G}}=O_{p}(\delta _{2,n}^{\ast})$, where $\delta_{2,n}^{\ast}$ is defined in Theorem \ref{T-rate}. \end{corollary} \begin{proof} [Proof of Corollary \ref{C-rate}]By\ Assumption \ref{R1'}.(iii), we know that for any small number $\omega>0$, there exists a sufficiently large constant $M_{n}$ such that% \begin{align*} \Pr(\left\vert U(Z_{i})\right\vert & >M_{n}\text{ for all }i\leq n)\leq \sum_{i=1}^{n}\Pr(\left\vert U(Z_{i})\right\vert >M_{n})\\ & \leq\sum_{i=1}^{n}\frac{\mathbb{E}\left[ \left\vert U(Z)\right\vert ^{s_{2}}\right] }{M_{n}^{s_{2}}}\leq cnM_{n}^{-s_{2}}\leq\omega, \end{align*} where the first inequality is by the Bonferroni inequality, and the second inequality is by the Markov inequality. Now, conditioning on the event $\{ \left\vert U(Z_{i})\right\vert \leq M_{n}$ for all $i\leq n\}$ and using Assumption \ref{R1'}.(iii), we have \[ \left\vert \psi(Z_{i},g,h_{o})-\psi(Z_{i},g_{o},h_{o})\right\vert \leq \delta^{s_{1}}M_{n}% \] for all $i\leq n$ and for any $\psi(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\in \Psi_{n,\delta}$, which together with Assumption \ref{R1'}.(i) and (ii), enables us to invoke Lemma 19.36 in Van der Vaart (1998) to get \begin{align*} & \mathbb{E}\left[ \sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta\right\} }\left\vert \mu _{n}\left[ \psi(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\right] \right\vert \right] \\ & \leq\frac{cJ_{[]}\left( \delta,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right) }{\sqrt{n}}\left( 1+\frac{J_{[]}\left( \delta,\Psi_{n,\delta },\left\Vert \cdot\right\Vert _{2}\right) }{\sqrt{n}\delta^{2}}M_{n}\right) \equiv\frac{\phi_{n}(\delta)}{\sqrt{n}}. \end{align*} By Assumption \ref{R1'}.(iv), we know that the above function $\phi_{n}% (\delta)$ satisfies the requirement (\ref{DR1}) in Theorem \ref{T-rate}. The rest of the proof is the same as that of Theorem \ref{T-rate} and hence is omitted. \end{proof} \bigskip \begin{thebibliography}{99} % \bibitem {}Belloni, A., V. Chernozhukov, D. Chetverikov, and K. Kato (2015): \textquotedblleft Some New Asymptotic Theory for Least Squares Series: Pointwise and Uniform Results,\textquotedblright\ Journal of Econometrics, 186, 345--366. \bibitem {}Belloni, A.,\ Chernozhukov, D., Chernozhukov, V., and Fern\'{a}ndez-val, I. (2016): "Conditional Quantile Process Based on Series or Many Regressors," Working Paper, Department of Economics, UCLA. \bibitem {}Chen, X. (2007): \textquotedblleft Large Sample Sieve Estimation of Semi-Nonparametric Models,\textquotedblright\ In: James J. Heckman and Edward E. Leamer, Editor(s), \emph{Handbook of Econometrics}, 6B, Pages 5549-5632. \bibitem {}Chen, X. and X. Shen (1998): \textquotedblleft Sieve Extremum Estimates for Weakly Dependent Data,\textquotedblright\ \emph{Econometrica}, 66, 289-314. \bibitem {}Chen, X., and D. Pouzo (2012): \textquotedblleft Estimation of Nonparametric Conditional Moment Models with Possibly Nonsmooth Generalized Residuals,\textquotedblright\ \emph{Econometrica} 80, 277-321. \bibitem {}Shen, X. and W.H. Wong (1994): \textquotedblleft Convergence Rate of Sieve Estimates,\textquotedblright\ \emph{Annals of Statistics}, 22(2) 580--615. \bibitem {}Van der Vaart, A. and J. Wellner (1996): \emph{Weak Convergence and Empirical Processes: with Applications to Statistics}, New York: Springer-Verlag \bibitem {}Van der Vaart, A. (1998): \emph{Asymptotic Statistics}, Cambridge: Cambridge University Press. \bibitem {}White, H. and J. Wooldridge (1991): \textquotedblleft Some Results on Sieve Estimation with Dependent Observations\textquotedblright,\ in Barnett, W.A., J. Powell and G. Tauchen (eds.), \emph{Non-parametric and Semi-parametric Methods in Econometrics and Statistics}, 459-493, Cambridge: Cambridge University Press. \bibitem {}Wooldridge, J.M. (2002): \emph{Econometric Analysis of Cross Section and Panel Data}, Cambridge: MIT Press. \end{thebibliography} \end{document}