\documentclass[11pt]{article}%
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{amsfonts}
\usepackage{geometry}
\usepackage[onehalfspacing]{setspace}
\usepackage{numinsec}
\usepackage{harvard}
\usepackage{hyperref}
\usepackage{graphicx}%
\setcounter{MaxMatrixCols}{30}
%TCIDATA{OutputFilter=latex2.dll}
%TCIDATA{Version=5.50.0.2890}
%TCIDATA{CSTFile=LaTeX article (bright).cst}
%TCIDATA{Created=Saturday, July 19, 2008 23:46:52}
%TCIDATA{LastRevised=Monday, January 15, 2018 20:37:49}
%TCIDATA{}
%TCIDATA{}
%TCIDATA{BibliographyScheme=BibTeX}
%TCIDATA{}
%TCIDATA{Language=American English}
%BeginMSIPreambleData
\providecommand{\U}[1]{\protect\rule{.1in}{.1in}}
%EndMSIPreambleData
\newtheorem{theorem}{Theorem}[section]
\newtheorem{acknowledgement}[theorem]{Acknowledgement}
\newtheorem{algorithm}[theorem]{Algorithm}
\newtheorem{axiom}[theorem]{Axiom}
\newtheorem{case}[theorem]{Case}
\newtheorem{claim}[theorem]{Claim}
\newtheorem{conclusion}[theorem]{Conclusion}
\newtheorem{condition}[theorem]{Condition}
\newtheorem{conjecture}[theorem]{Conjecture}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{criterion}[theorem]{Criterion}
\newtheorem{definition}[theorem]{Definition}
\newtheorem{example}[theorem]{Example}
\newtheorem{exercise}[theorem]{Exercise}
\newtheorem{lemma}{Lemma}[section]
\newtheorem{notation}[theorem]{Notation}
\newtheorem{problem}[theorem]{Problem}
\newtheorem{proposition}[theorem]{Proposition}
\newtheorem{remark}[theorem]{Remark}
\newtheorem{solution}[theorem]{Solution}
\newtheorem{summary}[theorem]{Summary}
\newtheorem{assumption}{Assumption}[section]
\newenvironment{proof}[1][Proof]{\noindent \textbf{#1.} }{\ \rule{0.5em}{0.5em}}
\renewcommand{\baselinestretch}{1.5}
\oddsidemargin -.1in
\evensidemargin -.1in
\marginparwidth 1in
\marginparsep 0pt
\topmargin 0pt
\headheight 0pt
\headsep 0pt
\textheight 8.9in
\textwidth 6.9in
\topskip 0pt
\footskip 1cm
\begin{document}
\title{Supplemental Appendix for \textquotedblleft Nonparametric Two-Step Sieve M
Estimation and Inference\textquotedblright}
\author{Jinyong Hahn\thanks{Department of Economics, UCLA, Los Angeles, CA 90095-1477
USA. Email:\ hahn@econ.ucla.edu}\\UCLA
\and Zhipeng Liao\thanks{Department of Economics, UCLA, Los Angeles, CA 90095-1477
USA. Email:\ zhipeng.liao@econ.ucla.edu}\\UCLA
\and Geert Ridder\thanks{Department of Economics, University of Southern
California, Los Angeles, CA 90089.\ Email:\ ridder@usc.edu.}\\USC}
\date{This version: December 2017}
\maketitle
This supplemental appendix provides some auxiliary materials for
"Nonparametric Two-Step Sieve M Estimation and Inference"(cited as HLR in this
appendix). Section\ 1 provides sufficient conditions for Assumptions 3.2 and
3.4 in HLR which are the key high-level conditions for asymptotic normality of
the two-step sieve M estimator. Section 2 presents some lemmas which are used
in proving Theorem 5.1 in HLR. Section 3 contains verification of the
high-level assumptions for asymptotic normality in the nonparametric
triangular simultaneous equation model. Section 4 contains some extra
simulation results.\ Section 5 establishes general theory on the consistency
and convergence rate of the nonparametric two-step sieve M estimator.
\section{Sufficient Conditions for Assumptions 3.2 and 3.4 in HLR}
In this section, we provide sufficient conditions for the high-level
assumptions (Assumptions 3.2\ and 3.4) of the asymptotic normality of the
nonparametric two-step sieve M estimator. These sufficient conditions are
verified in the nonparametric triangular simultaneous equation model in
Section 3 of the Appendix.\ We assume that the data $\left\{ Z_{i}\right\}
_{i=1}^{n}$ is i.i.d. in this section.
\begin{assumption}
\label{L-SA-1} (i) For any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha
\in\mathcal{N}_{\alpha}$ and any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$, the
following directional derivatives exist%
\[
\Delta_{\psi}(z_{2},\alpha)[v_{g,1}]=\left. \frac{\partial\psi(z_{2},g+\tau
v_{g,1},h)}{\partial\tau}\right\vert _{\tau=0}\text{ and }r_{\psi,g}%
(z_{2},\alpha)[v_{g,1},v_{g,2}]=\left. \frac{\partial\Delta_{\psi}%
(z_{2},g+\tau v_{g,2},h)[v_{g,1}]}{\partial\tau}\right\vert _{\tau=0};
\]
(ii) there exists $\Lambda_{1,n}(z_{2})$ with $\mathbb{E}\left[ \Lambda
_{1,n}(Z_{2})\right] \leq C$ such that
\[
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \psi(z_{2},g^{\ast},h)-\psi
(z_{2},\alpha)-\Delta_{\psi}(z_{2},\alpha)[\pm\kappa_{n}u_{g_{n}}^{\ast
}]-\kappa_{n}^{2}r_{\psi,g}(z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast
}]\right\vert \leq\kappa_{n}^{2}\Lambda_{1,n}(z_{2});
\]
(iii) there exists $\Lambda_{2,n}(z_{2})$ with $\mathbb{E}\left[
\Lambda_{2,n}(Z_{2})\right] \leq C$ such that%
\[
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert r_{\psi,g}(z_{2},\alpha)[u_{g_{n}%
}^{\ast},u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast
},u_{g_{n}}^{\ast}]\right\vert \leq\Lambda_{2,n}(z_{2});
\]
(iv) $\mathbb{E}\left[ \left\vert r_{\psi,g}(Z_{2},\alpha_{o})[u_{g_{n}%
}^{\ast},u_{g_{n}}^{\ast}]\right\vert \right] \leq C$; (v) $\mathbb{E}\left[
r_{\psi,h}(Z_{2},\alpha_{o})[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]\right]
=o(n^{-1/2})$.
\end{assumption}
\begin{assumption}
\label{L-SA-2} (i) For any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha
\in\mathcal{N}_{\alpha}$, any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}%
\in\mathcal{V}_{2}$, the following directional derivative exists%
\[
\left. \frac{\partial\Delta_{\psi}(z_{2},g,h+\tau v_{h})[v_{g}]}{\partial
\tau}\right\vert _{\tau=0}=r_{\psi,h}(z_{2},\alpha)[v_{g},v_{h}];
\]
(ii) there exists $\Lambda_{3,n}(z_{2},\alpha)$ such that for any $\alpha
\in\mathcal{N}_{n}$,%
\[
\left\vert \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}%
(z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}%
}^{\ast}]\right\vert \leq\Lambda_{3,n}(z_{2},\alpha);
\]
(iii) there exists $\Lambda_{4,n}(z_{2},\alpha)$ such that for any $\alpha
\in\mathcal{N}_{n}$,%
\[
\left\vert \Delta_{\psi}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}%
(z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}(z_{2},g_{o},h_{o}%
)[h-h_{o},u_{g_{n}}^{\ast}]\right\vert \leq\Lambda_{4,n}(z_{2},\alpha);
\]
(iv) there exists $\Lambda_{5,n}(z_{2},\alpha)$ such that for any $\alpha
\in\mathcal{N}_{n}$,%
\[
\left\vert r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]-r_{\psi
,g}(z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\vert \leq\Lambda
_{5,n}(z_{2},\alpha);
\]
(v) $\max_{j=3,4,5}\sup_{\alpha\in\mathcal{N}_{n}}n^{-1/2}\sum_{i=1}%
^{n}\Lambda_{j,n}(Z_{2,i},\alpha)=o_{p}(1)$; (vi) $\max_{j=3,4,5}\sup
_{\alpha\in\mathcal{N}_{n}}\mathbb{E}\left[ \Lambda_{j,n}(Z_{2}%
,\alpha)\right] =o(n^{1/2})$.
\end{assumption}
By Assumption \ref{L-SA-1}.(i) and the definition of $\left\Vert
\cdot\right\Vert _{\psi}$, we have
\[
\langle v_{g,1},v_{g,2}\rangle_{\psi}=\mathbb{E}\left[ r_{\psi,g}%
(Z_{2},\alpha_{o})[v_{g,1},v_{g,2}]\right]
\]
for any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$. By Assumption \ref{L-SA-2}.(i),
we have
\[
\Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] =\mathbb{E}\left[ r_{\psi
,h}(Z_{2},\alpha_{o})[v_{h},v_{g}]\right]
\]
for any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}\in\mathcal{V}_{2}$.
Suppose that $\mathcal{F}$ is a class of functions of $Z$. Let $F$ denote an
envelope of $\mathcal{F}$,
\[
F(z)\geq\sup_{f\in\mathcal{F}}\left\vert f(z)\right\vert \text{ for any }%
z\in\mathcal{Z}%
\]
where $\mathcal{Z}$ denotes the support of $Z$. For a probability measure $Q$
and a constant $q$, such that $\left\Vert F\right\Vert _{Q,q}>0$ (where
$\left\Vert \cdot\right\Vert _{Q,q}$ denotes the $L_{q}$-norm under $Q$), we
use $N(\varepsilon\left\Vert F\right\Vert _{Q,q},\mathcal{F},\left\Vert
\cdot\right\Vert _{Q,q})$ to denote the minimal number of $\left\Vert
\cdot\right\Vert _{Q,q}$-balls of radius $\varepsilon\left\Vert F\right\Vert
_{Q,q}$ needed to cover $\mathcal{F}$. The supremum of $N(\varepsilon
\left\Vert F\right\Vert _{Q,q},\mathcal{F},\left\Vert \cdot\right\Vert
_{Q,q})$ over all finitely-discrete probability measures $Q$, is a uniform
entropy number of $\mathcal{F}$.
Define
\begin{align*}
\mathcal{F}_{1,n}^{\ast} & =\left\{ z_{2}\mapsto r_{\psi,h}(z_{2}%
,\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]:h\in\mathcal{N}_{h,n}\right\} ,\\
\mathcal{F}_{2,n}^{\ast} & =\left\{ z_{2}\mapsto r_{\psi,g}(z_{2}%
,\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]:g\in\mathcal{N}_{g,n}\right\} ,
\end{align*}
where $h_{o,n}\in\mathcal{H}_{n}$ and $g_{o,n}\in\mathcal{G}_{n}$ are such
that $\left\Vert h_{o,n}-h_{o}\right\Vert _{\mathcal{H}}=O(\delta_{1,n}^{\ast
})$ and $\left\Vert g_{o,n}-g_{o}\right\Vert _{\mathcal{G}}=O(\delta
_{2,n}^{\ast})$.
\begin{assumption}
\label{L-SA-3} (i) $\mathbb{E}\left[ \left\vert r_{\psi,h}(Z_{2},\alpha
_{o})[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]\right\vert \right] =o(n^{-1/2})$; (ii)
let $F_{1,n}^{\ast}$ denote an envelope of $\mathcal{F}_{1,n}^{\ast}$, then
\[
\sup_{Q}N(\varepsilon\left\Vert F_{1,n}^{\ast}\right\Vert _{Q,2}%
,\mathcal{F}_{1,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any
}\varepsilon\in(0,1];
\]
(iii) $\mathbb{E}\left[ \left\vert r_{\psi,g}(Z_{2},\alpha_{o})[g_{o,n}%
-g_{o},u_{g_{n}}^{\ast}]\right\vert \right] =o(n^{-1/2})$; (iv) let
$F_{2,n}^{\ast}$ denote an envelope of $\mathcal{F}_{2,n}^{\ast}$, then
\[
\sup_{Q}N(\varepsilon\left\Vert F_{2,n}^{\ast}\right\Vert _{Q,2}%
,\mathcal{F}_{2,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{CK}\ \text{for any
}\varepsilon\in(0,1];
\]
(v) $\max_{j=1,2,}(\sup_{f\in\mathcal{F}_{j,n}^{\ast}}\mathbb{E}\left[
f^{2}\right] +(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{j,n}^{2}(z_{2}%
)|\log(n)n^{-1})^{1/2}((K+L)\log(n))^{1/2}=o(1)$.
\end{assumption}
\begin{lemma}
\label{L-SA-L1} Under Assumptions \ref{L-SA-1}-\ref{L-SA-3}, Assumption 3.2 in
HLR holds.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-SA-L1}]By Assumptions \ref{L-SA-1}.(i)-(ii), and the
triangle inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert n^{-1}\sum_{i=1}^{n}\left[
\begin{array}
[c]{c}%
\psi(Z_{2,i},g^{\ast},h)-\psi(Z_{2,i},g,h)\\
-\Delta_{\psi}(Z_{2,i},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}%
^{2}r_{\psi,g}(Z_{2,i},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]
\end{array}
\right] \right\vert \leq C\kappa_{n}^{2}n^{-1}\sum_{i=1}^{n}\Lambda
_{1,n}(Z_{2,i}) \label{PL-SA-L1-1}%
\end{equation}
which together with $\mathbb{E}\left[ \Lambda_{1,n}(Z_{2})\right] \leq C$
and the Markov inequality implies that
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert n^{-1}\sum_{i=1}^{n}\left[
\begin{array}
[c]{c}%
\psi(Z_{2,i},g^{\ast},h)-\psi(Z_{2,i},g,h)\\
-\Delta_{\psi}(Z_{2,i},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}%
^{2}r_{\psi,g}(Z_{2,i},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]
\end{array}
\right] \right\vert =O_{p}(\kappa_{n}^{2}). \label{PL-SA-L1-2}%
\end{equation}
Similarly, by Assumptions \ref{L-SA-1}.(i)-(ii), and the triangle inequality,
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[
\begin{array}
[c]{c}%
\psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\\
-\Delta_{\psi}(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}%
r_{\psi,g}(Z_{2},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]
\end{array}
\right] \right\vert =O(\kappa_{n}^{2}), \label{PL-SA-L1-3}%
\end{equation}
which together with (\ref{PL-SA-L1-2}) implies that
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{
\begin{array}
[c]{c}%
\psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\\
-\Delta_{\psi}(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}%
r_{\psi,g}(Z_{2},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]
\end{array}
\right\} \right\vert =O_{p}(\kappa_{n}^{2}). \label{PL-SA-L1-4}%
\end{equation}
By Assumptions \ref{L-SA-1}.(iii), the triangle inequality and the Markov
inequality,\
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ r_{\psi,g}%
(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]-r_{\psi,g}(Z_{2},\alpha
_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right\} \right\vert =O_{p}(1)
\label{PL-SA-L1-4a}%
\end{equation}
which together with Assumptions \ref{L-SA-1}.(iv), the triangle inequality and
the Markov inequality implies that
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ r_{\psi,g}%
(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right\} \right\vert
=O_{p}(1). \label{PL-SA-L1-5}%
\end{equation}
Combining the results in (\ref{PL-SA-L1-4}) and (\ref{PL-SA-L1-5}), and then
applying the triangle inequality, we prove condition (12) of Assumption
3.2.(i) in HLR.
By Assumptions \ref{L-SA-2}.(ii), \ref{L-SA-2}.(v)-(vi), the triangle
inequality and the Markov inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}%
(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast
}]-r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]\right\} \right\vert
=o_{p}(n^{-1/2}). \label{PL-SA-L1-6a}%
\end{equation}
Similarly, by Assumptions \ref{L-SA-2}.(iv)-(vi), the triangle inequality and
the Markov inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ r_{\psi,g}%
(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]-r_{\psi,g}(Z_{2},g_{o}%
,h_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}).
\label{PL-SA-L1-6b}%
\end{equation}
By Assumption \ref{L-SA-3}.(iii), the triangle inequality and the Markov
inequality,%
\begin{equation}
\left\vert \mu_{n}\left\{ r_{\psi,g}(Z_{2},\alpha_{o})[g_{o,n}-g_{o}%
,u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6c}%
\end{equation}
By Assumptions \ref{L-SA-3}.(iv)-(v), we can use Lemma 22 in Belloni, et. al
(2016) to show that
\begin{equation}
\sup_{g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ r_{\psi,g}(Z_{2}%
,\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}%
(n^{-1/2}), \label{PL-SA-L1-6d}%
\end{equation}
which together with (\ref{PL-SA-L1-6c}) implies that
\begin{equation}
\sup_{g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ r_{\psi,g}(Z_{2}%
,\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}).
\label{PL-SA-L1-6e}%
\end{equation}
Collecting the results in (\ref{PL-SA-L1-6a}), (\ref{PL-SA-L1-6b}) and
(\ref{PL-SA-L1-6e}), we get%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}%
(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast
}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6f}%
\end{equation}
By Assumptions \ref{L-SA-2}.(iii), \ref{L-SA-2}.(v)-(vi), the triangle
inequality and the Markov inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}%
(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},\alpha_{o})[u_{g_{n}%
}^{\ast}]-r_{\psi,h}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\}
\right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6g}%
\end{equation}
By Assumption \ref{L-SA-3}.(i), the triangle inequality and the Markov
inequality,%
\begin{equation}
\left\vert \mu_{n}\left\{ r_{\psi,h}(Z_{2},\alpha_{o})[h_{o,n}-h_{o}%
,u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6h}%
\end{equation}
By Assumptions \ref{L-SA-3}.(ii) and \ref{L-SA-3}.(v), we can use Lemma 22 in
Belloni, et. al (2016) to show that%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ r_{\psi,h}(Z_{2}%
,\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}%
(n^{-1/2}), \label{PL-SA-L1-6i}%
\end{equation}
which together with (\ref{PL-SA-L1-6h}) implies that
\begin{equation}
\sup_{g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{ r_{\psi,h}(Z_{2}%
,\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}).
\label{PL-SA-L1-6j}%
\end{equation}
Collecting the results in (\ref{PL-SA-L1-6g}) and (\ref{PL-SA-L1-6j}), we get%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}%
(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},\alpha_{o})[u_{g_{n}%
}^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L1-6k}%
\end{equation}
Combining the results in (\ref{PL-SA-L1-6f}) and (\ref{PL-SA-L1-6k}), and then
applying the triangle inequality, we immediately prove condition (13) of
Assumption 3.2.(i) in HLR.
By Assumptions \ref{L-SA-1}.(ii)-(iv),
\begin{equation}
\mathbb{E}\left[ \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\right] =\pm
\kappa_{n}\mathbb{E}\left[ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]\right]
+O(\kappa_{n}^{2}), \label{PL-SA-L1-10}%
\end{equation}
uniformly over $\alpha\in\mathcal{N}_{n}$. As $\mathbb{E}\left[ \Delta_{\psi
}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}]\right] =0$, by Assumptions
\ref{L-SA-2}.(ii)-(iv) and \ref{L-SA-2}.(vi)
\begin{align}
& \mathbb{E}\left[ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]\right]
\nonumber\\
& =\mathbb{E}\left[ \Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi
}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}%
}^{\ast}]\right] \nonumber\\
& +\mathbb{E}\left[ \Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}%
]-\Delta_{\psi}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}(Z_{2}%
,g_{o},h_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right] \nonumber\\
& +\mathbb{E}\left[ r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast
}]-r_{\psi,g}(Z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right] \nonumber\\
& +\mathbb{E}\left[ r_{\psi,g}(Z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast
}]\right] +\mathbb{E}\left[ r_{\psi,h}(Z_{2},g_{o},h_{o})[h-h_{o},u_{g_{n}%
}^{\ast}]\right] \nonumber\\
& =\mathbb{E}\left[ r_{\psi,g}(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast
}]\right] +\mathbb{E}\left[ r_{\psi,h}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}%
}^{\ast}]\right] +o(n^{-1/2})\nonumber\\
& =\langle g-g_{o},u_{g_{n}}^{\ast}\rangle_{\psi}+\Gamma(\alpha_{o})\left[
h-h_{o},u_{g_{n}}^{\ast}\right] +o(n^{-1/2}) \label{PL-SA-L1-11}%
\end{align}
where the second equality is by the definition of the inner product
$\langle\cdot,\cdot\rangle_{\psi}$ and the functional $\Gamma(\alpha
_{o})\left[ \cdot,\cdot\right] $.\ By Assumption \ref{L-SA-1}.(v),
(\ref{PL-SA-L1-10}), (\ref{PL-SA-L1-11}) and the definition of $K_{\psi}%
(g,h)$, we have%
\begin{equation}
K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\left[ \langle g-g_{o}%
,u_{g_{n}}^{\ast}\rangle_{\psi}+\Gamma(\alpha_{o})\left[ h-h_{o,n},u_{g_{n}%
}^{\ast}\right] \right] +O(\kappa_{n}^{2}). \label{PL-SA-L1-12}%
\end{equation}
By the definition of $||\cdot||_{\psi}$ and Assumption \ref{L-SA-1}.(iv),
\begin{equation}
\frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}=\langle
g-g_{o},\pm\kappa_{n}u_{g_{n}}^{\ast}\rangle_{\psi}+O(\kappa_{n}^{2}).
\label{PL-SA-L1-13}%
\end{equation}
Collecting the results in (\ref{PL-SA-L1-12}) and (\ref{PL-SA-L1-13}), we
immediately prove Assumption 3.2.(ii) in HLR.
\end{proof}
We next provide sufficient conditions for Assumptions 3.2 and 3.4 in HLR when
the criterion function in the second-step M estimation takes the following
form%
\begin{equation}
\psi(Z_{2},g,h)=\tau(Z_{1},h)\psi^{\ast}(Z_{2},g,h). \label{L-SA-D1}%
\end{equation}
We will assume that Assumptions \ref{L-SA-1}.(i) and \ref{L-SA-2}.(i) hold for
$\psi^{\ast}(Z_{2},g,h)$. Define%
\[
\Delta_{\psi}^{\ast}(z_{2},\alpha)[v_{g,1}]=\left. \frac{\partial\psi^{\ast
}(z_{2},g+\tau v_{g,1},h)}{\partial\tau}\right\vert _{\tau=0}\text{ and
}r_{\psi,g}^{\ast}(z_{2},\alpha)[v_{g,1},v_{g,2}]=\left. \frac{\partial
\Delta_{\psi}^{\ast}(z_{2},g+\tau v_{g,2},h)[v_{g,1}]}{\partial\tau
}\right\vert _{\tau=0},
\]
for any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha\in\mathcal{N}_{\alpha}$ and any
$v_{g,1},v_{g,2}\in\mathcal{V}_{2}$. Then we have
\[
\Delta_{\psi}(z_{2},\alpha)[v_{g,1}]=\tau(z_{1},h)\Delta_{\psi}^{\ast}%
(z_{2},\alpha)[v_{g,1}]\text{ and }r_{\psi,g}(z_{2},\alpha)[v_{g,1}%
,v_{g,2}]=\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha)[v_{g,1},v_{g,2}]
\]
for any $\alpha\in\mathcal{N}_{\alpha}$ and any $v_{g,1},v_{g,2}\in
\mathcal{V}_{2}$. Define%
\[
r_{\psi,h}(z_{2},\alpha)[v_{h},v_{g}]=\tau(z_{1},h)r_{\psi,h}^{\ast}%
(z_{2},\alpha)[v_{h},v_{g}],
\]
where
\[
r_{\psi,h}^{\ast}(z_{2},\alpha)[v_{h},v_{g}]=\left. \frac{\partial
\Delta_{\psi}^{\ast}(z_{2},g,h+\tau v_{h})[v_{g}]}{\partial\tau}\right\vert
_{\tau=0}.
\]
Let $\xi_{n}$ denote a non-decreasing real positive sequence, and
$\delta_{\tau,n}^{\ast}$ denote a real positive sequence.
\begin{assumption}
\label{L-SA-4} (i) sup$_{z_{1}\in\mathcal{Z}_{1},h\in\mathcal{N}_{h,n}}\left[
\left\vert \tau(z_{1},h)\right\vert +\left\vert \tau(z_{1},h_{o})\right\vert
\right] \leq C$; (ii) Assumptions \ref{L-SA-1}.(i)-(ii) and \ref{L-SA-1}.(v)
hold; (iii) equation (19) in HLR holds; (iv) $\Delta_{\psi}^{\ast}%
(z_{2},\alpha)[v_{g}]$ satisfies Assumption \ref{L-SA-2}%
.(i);\ (v)\ Assumptions \ref{L-SA-2}.(ii) and \ref{L-SA-2}.(v)-(vi)
hold;\ (vi)\ $\sup_{z_{1}\in\mathcal{Z}_{1}}\mathbb{E}\left[ \left.
(\Delta_{\psi}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast}])^{2}\right\vert
Z_{1}=z_{1}\right] \leq$ $\xi_{n}^{2}$; (vii)
\[
\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}(\tau(Z_{1,i},h)-\tau
(Z_{1,i},h_{o}))^{2}=O_{p}(\delta_{\tau,n}^{\ast})
\]
where $\delta_{\tau,n}^{\ast}\xi_{n}^{2}=o(1)$.
\end{assumption}
\begin{assumption}
\label{L-SA-5} (i) there exists $\Lambda_{6,n}(z_{2},\alpha)$ such that for
any $\alpha\in\mathcal{N}_{n}$
\[
\left\vert \tau(z_{1},h)\left( r_{\psi,g}^{\ast}(z_{2},g_{o},h)[g-g_{o}%
,u_{g_{n}}^{\ast}]-r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}%
^{\ast}]\right) \right\vert \leq\Lambda_{6,n}(z_{2},\alpha);
\]
(ii) there exists $\Lambda_{7,n}(z_{2},\alpha)$ such that for any $\alpha
\in\mathcal{N}_{n}$%
\[
\left\vert \tau(z_{1},h)\left( \Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}%
}^{\ast}]-\Delta_{\psi}^{\ast}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]-r_{\psi
,h}^{\ast}(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right) \right\vert
\leq\Lambda_{7,n}(z_{2},\alpha);
\]
(iii)
\[
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}%
,h)-\tau(Z_{1},h_{o}))r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}%
}^{\ast}]\right] \right\vert =o(n^{-1/2});
\]
(iv)%
\[
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ (\tau
(Z_{1},h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[g-g_{o}%
,u_{g_{n}}^{\ast}]\right] \right\vert =o(n^{-1/2});
\]
(v) there exists $\Lambda_{8,n}(z_{2})$ with $\mathbb{E}\left[ \Lambda
_{8,n}(Z_{2})\right] \leq C$%
\[
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \tau(z_{1},h)(r_{\psi,g}^{\ast
}(z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]-r_{\psi,g}^{\ast}%
(z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}])\right\vert \leq
\Lambda_{8,n}(z_{2});
\]
(vi) $\mathbb{E}\left[ \left\vert r_{\psi,g}^{\ast}(Z_{2},\alpha
_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right\vert \right] \leq C$; (vii)
$\max_{j=6,7}\sup_{\alpha\in\mathcal{N}_{n}}n^{-1}\sum_{i=1}^{n}\Lambda
_{j,n}(Z_{2,i},\alpha)=o_{p}(n^{-1/2})$; (viii) $\max_{j=6,7}\sup_{\alpha
\in\mathcal{N}_{n}}\mathbb{E}\left[ \Lambda_{j,n}(Z_{2},\alpha)\right]
=o(n^{-1/2})$.
\end{assumption}
Define
\begin{align*}
\mathcal{F}_{3,n}^{\ast} & =\left\{ z_{2}\mapsto\tau(z_{1},h)r_{\psi
,h}^{\ast}(z_{2},\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]:h\in\mathcal{N}%
_{h,n}\right\} ,\\
\mathcal{F}_{4,n}^{\ast} & =\left\{ z_{2}\mapsto\tau(z_{1},h)r_{\psi
,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]:h\in\mathcal{N}%
_{h,n},g\in\mathcal{N}_{g,n}\right\} .
\end{align*}
\begin{assumption}
\label{L-SA-6} (i) $\mathbb{E}[|r_{\psi,h}^{\ast}(Z_{2},\alpha_{o}%
)[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]|]=o(n^{-1/2})$; (ii) let $F_{3,n}^{\ast}$
denote an envelope of $\mathcal{F}_{3,n}^{\ast}$, then
\[
\sup_{Q}N(\varepsilon\left\Vert F_{3,n}^{\ast}\right\Vert _{Q,2}%
,\mathcal{F}_{3,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any
}\varepsilon\in(0,1];
\]
(iii) $\mathbb{E}\left[ \left\vert r_{\psi,g}^{\ast}(Z_{2},\alpha
_{o})[g_{o,n}-g_{o},u_{g_{n}}^{\ast}]\right\vert \right] =o(n^{-1/2})$; (iv)
let $F_{4,n}^{\ast}$ denote an envelope of $\mathcal{F}_{4,n}^{\ast}$, then
\[
\sup_{Q}N(\varepsilon\left\Vert F_{4,n}^{\ast}\right\Vert _{Q,2}%
,\mathcal{F}_{4,n}^{\ast},L_{2}(Q))\leq(C/\varepsilon)^{C(L+K)}\ \text{for any
}\varepsilon\in(0,1];
\]
(v) $\max_{j=3,4}(\sup_{f\in\mathcal{F}_{j,n}^{\ast}}\mathbb{E}\left[
f^{2}\right] +(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{j,n}^{\ast}(z_{2}%
)|\log(n)n^{-1})^{1/2}((K+L)\log(n))^{1/2}=o(1)$.
\end{assumption}
By definition, we have $\langle v_{g,1},v_{g,2}\rangle_{\psi}=\mathbb{E}%
\left[ \tau(Z_{1},h_{o})r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[v_{g,1}%
,v_{g,2}]\right] $ for any $v_{g,1},v_{g,2}\in\mathcal{V}_{2}$. Moreover, by
(19) in HLR,
\[
\Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] =\mathbb{E}\left[ \tau
(Z_{1},h_{o})r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[v_{h},v_{g}]\right]
\]
for any $v_{h}\in\mathcal{V}_{1}$ and any $v_{g}\in\mathcal{V}_{2}$.
\begin{lemma}
\label{L-SA-L2} Under Assumptions \ref{L-SA-4}-\ref{L-SA-6}, condition (13) of
Assumption 3.2, Assumption 3.2.(ii) and Assumption 3.4 in HLR holds.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-SA-L2}]By Assumptions \ref{L-SA-1}.(i)-(ii), we can use
the same arguments in the proof of Lemma \ref{L-SA-L1} to show that
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{
\begin{array}
[c]{c}%
\psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)\\
-\Delta_{\psi}(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}%
r_{\psi,g}(Z_{2},g,h)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]
\end{array}
\right\} \right\vert =O_{p}(\kappa_{n}^{2}). \label{PL-SA-L2-0a}%
\end{equation}
By Assumptions \ref{L-SA-5}.(v), \ref{L-SA-5}.(vii)-(viii), the triangle
inequality and the Markov inequality,\
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \tau(Z_{1}%
,h)(r_{\psi,g}^{\ast}(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}%
]-r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast
}])\right\} \right\vert =O_{p}(1). \label{PL-SA-L2-0b}%
\end{equation}
By Assumptions \ref{L-SA-4}.(i) and \ref{L-SA-5}.(vi), the triangle inequality
and the Markov inequality, which together with (\ref{PL-SA-L2-0b}) and the
triangle inequality implies that
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \tau(Z_{1}%
,h)r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast
}]\right\} \right\vert =O_{p}(1). \label{PL-SA-L2-0c}%
\end{equation}
Combining the results in (\ref{PL-SA-L2-0a})-(\ref{PL-SA-L2-0c}), and then
applying the triangle inequality, we prove condition (13) of Assumption
3.2.(i) in HLR.
By Assumption \ref{L-SA-5}.(v),
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ \tau
(Z_{1},h)(r_{\psi,g}^{\ast}(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast
}]-r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast
}])\right] \right\vert =O(1). \label{PL-SA-L2-0d}%
\end{equation}
By Assumptions \ref{L-SA-4}.(i) and \ref{L-SA-5}.(vi),
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}%
,h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast
},u_{g_{n}}^{\ast}]\right] \right\vert =O(1) \label{PL-SA-L2-0e}%
\end{equation}
and
\begin{equation}
\left\vert \mathbb{E}\left[ \tau(Z_{1},h_{o})r_{\psi,g}^{\ast}(Z_{2}%
,\alpha_{o})[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right] \right\vert =O(1),
\label{PL-SA-L2-0f}%
\end{equation}
which together with (\ref{PL-SA-L2-0d}) and the triangle inequality implies
that
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mathbb{E}\left[ r_{\psi,g}%
(Z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\right] \right\vert =O(1).
\label{PL-SA-L2-0g}%
\end{equation}
By Assumptions \ref{L-SA-1}.(ii), (\ref{PL-SA-L2-0g}) and the triangle
inequality,
\begin{equation}
\mathbb{E}\left[ \tau(Z_{1},h)\left[ \psi^{\ast}(Z_{2},g^{\ast}%
,h)-\psi^{\ast}(Z_{2},g,h)\right] \right] =\pm\kappa_{n}\mathbb{E}\left[
\tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}}^{\ast}]\right]
+O(\kappa_{n}^{2}), \label{PL-SA-L2-1}%
\end{equation}
uniformly over $\alpha\in\mathcal{N}_{n}$.\ By $\mathbb{E}[\tau(Z_{1}%
,h)\Delta_{\psi}^{\ast}(Z_{2},\alpha)[u_{g_{n}}^{\ast}]]=0$,\ Assumptions
\ref{L-SA-4}.(v), \ref{L-SA-5}.(i)-(iv) and \ref{L-SA-5}.(viii),
\begin{align}
& \mathbb{E}\left[ \tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}%
}^{\ast}]\right] \nonumber\\
& =\mathbb{E}\left[ \tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g_{o}%
,h)[u_{g_{n}}^{\ast}]\right] \nonumber\\
& \text{ \ \ \ }+\mathbb{E}\left[ \tau(Z_{1},h)r_{\psi,g}^{\ast}(Z_{2}%
,g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]\right] +o(n^{-1/2})\nonumber\\
& =\mathbb{E}\left[ \tau(Z_{1},h)r_{\psi,h}^{\ast}(z_{2},\alpha_{o}%
)[h-h_{o},u_{g_{n}}^{\ast}]\right] \nonumber\\
& \text{ \ \ \ }+\mathbb{E}\left[ \tau(Z_{1},h)r_{\psi,g}^{\ast}%
(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right] +o(n^{-1/2})\nonumber\\
& =\mathbb{E}\left[ r_{\psi,h}(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast
}]\right] +\mathbb{E}\left[ r_{\psi,g}(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}%
}^{\ast}]\right] +o(n^{-1/2})\nonumber\\
& =\Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] +\langle
g-g_{o},u_{g_{n}}^{\ast}\rangle_{\psi}+o(n^{-1/2}), \label{PL-SA-L2-2}%
\end{align}
where the last equality is by the definitions of the inner product
$\langle\cdot,\cdot\rangle_{\psi}$ and the functional $\Gamma(\alpha
_{o})\left[ \cdot,\cdot\right] $. By Assumption \ref{L-SA-1}.(v),
(\ref{PL-SA-L2-1}), (\ref{PL-SA-L2-2}) and the definition of $K_{\psi}(g,h)$,
we have%
\begin{equation}
K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\left[ \langle g-g_{o}%
,u_{g_{n}}^{\ast}\rangle_{\psi}+\Gamma(\alpha_{o})\left[ h-h_{o,n},u_{g_{n}%
}^{\ast}\right] \right] +O(\kappa_{n}^{2}). \label{PL-SA-L2-3}%
\end{equation}
By the definition of $||\cdot||_{\psi}$, Assumptions \ref{L-SA-4}.(i) and
\ref{L-SA-5}.(vi),
\begin{equation}
\frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}=\langle
g-g_{o},\pm\kappa_{n}u_{g_{n}}^{\ast}\rangle_{\psi}+O(\kappa_{n}^{2}).
\label{PL-SA-L2-4}%
\end{equation}
Collecting the results in (\ref{PL-SA-L2-3}) and (\ref{PL-SA-L2-4}), we
immediately prove Assumption 3.2.(ii) in HLR.
We next verify Assumption 3.4 in HLR.\ Assumptions 3.4.(i)-(ii) are assumed
directly. By definition,%
\begin{align}
& \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(z_{2}%
,g_{o},h)[u_{g_{n}}^{\ast}]\nonumber\\
& =\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o,n},u_{g_{n}}%
^{\ast}]\nonumber\\
& +\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g_{o,n}-g_{o},u_{g_{n}%
}^{\ast}]\nonumber\\
& +\left[ \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}%
(z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}%
}^{\ast}]\right] \nonumber\\
& +\tau(z_{1},h)\left( r_{\psi,g}^{\ast}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}%
}^{\ast}]-r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast
}]\right) . \label{PL-SA-L2-5}%
\end{align}
By Assumptions \ref{L-SA-4}.(v), \ref{L-SA-2}.(v)-(vi), \ref{L-SA-5}.(i) and
\ref{L-SA-5}.(vii)-(viii), and the Markov inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{
\begin{array}
[c]{c}%
\Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}%
,h)[u_{g_{n}}^{\ast}]\\
-r_{\psi,g}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]
\end{array}
\right\} \right\vert =o_{p}(n^{-1/2}), \label{PL-SA-L2-6}%
\end{equation}
and%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \tau(Z_{1},h)\left(
%
\begin{array}
[c]{c}%
r_{\psi,g}^{\ast}(Z_{2},g_{o},h)[g-g_{o},u_{g_{n}}^{\ast}]\\
-r_{\psi,g}^{\ast}(Z_{2},g_{o},h_{o})[g-g_{o},u_{g_{n}}^{\ast}]
\end{array}
\right) \right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-7}%
\end{equation}
By Assumptions \ref{L-SA-4}.(i), \ref{L-SA-6}.(iii) and the Markov inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\{ \tau(Z_{1},h)r_{\psi
,g}^{\ast}(Z_{2},\alpha_{o})[g_{o,n}-g_{o},u_{g_{n}}^{\ast}]\} \right\vert
=o_{p}(n^{-1/2}). \label{PL-SA-L2-8}%
\end{equation}
By Assumptions \ref{L-SA-6}.(iv)-(v), we can use Lemma 22 in Belloni, et. al
(2016) to show that%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(Z_{1},h)r_{\psi
,g}^{\ast}(Z_{2},\alpha_{o})[g-g_{o,n},u_{g_{n}}^{\ast}]\right\} \right\vert
=o_{p}(n^{-1/2}). \label{PL-SA-L2-9}%
\end{equation}
Collecting the results in (\ref{PL-SA-L2-5})-(\ref{PL-SA-L2-9}), and then
applying the triangle inequality, we get%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}%
(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast
}]\right\} \right\vert =o_{p}(n^{-1/2}), \label{PL-SA-L2-10}%
\end{equation}
which proves condition (20) in Assumption 3.4.(iii). By definition,
\begin{align}
& \tau(z_{1},h)(\Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}%
]-\Delta_{\psi}^{\ast}(z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])\nonumber\\
& =\tau(z_{1},h)r_{\psi,h}^{\ast}(z_{2},\alpha_{o})[h-h_{o,n},u_{g_{n}}%
^{\ast}]\nonumber\\
& +\tau(z_{1},h)r_{\psi,h}^{\ast}(z_{2},\alpha_{o})[h_{o,n}-h_{o},u_{g_{n}%
}^{\ast}]\nonumber\\
& +\tau(z_{1},h)(\Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}%
]-\Delta_{\psi}^{\ast}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}^{\ast
}(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]). \label{PL-SA-L2-11}%
\end{align}
By Assumptions \ref{L-SA-5}.(ii), \ref{L-SA-5}.(vii)-(viii), the Markov
inequality and the triangle inequality,%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{
\begin{array}
[c]{c}%
\tau(Z_{1},h)(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}%
]-\Delta_{\psi}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]\\
-r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}])
\end{array}
\right\} \right\vert =o_{p}(n^{-1/2}). \label{PL-SA-L2-12}%
\end{equation}
By Assumptions \ref{L-SA-4}.(i), \ref{L-SA-6}.(i), the Markov inequality and
the triangle inequality,%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\{ \tau(Z_{1},h)r_{\psi
,h}^{\ast}(Z_{2},\alpha_{o})[h_{o,n}-h_{o},u_{g_{n}}^{\ast}]\} \right\vert
=o_{p}(n^{-1/2}). \label{PL-SA-L2-13}%
\end{equation}
By Assumptions \ref{L-SA-6}.(ii)-(iii), we can use Lemma 22 in Belloni, et. al
(2016) to show that%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\{ \tau(Z_{1},h)r_{\psi,h}%
^{\ast}(Z_{2},\alpha_{o})[h-h_{o,n},u_{g_{n}}^{\ast}]\} \right\vert
=o_{p}(n^{-1/2}). \label{PL-SA-L2-14}%
\end{equation}
Collecting the results in (\ref{PL-SA-L2-11})-(\ref{PL-SA-L2-14}), and then
applying the triangle inequality, we get%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(Z_{1}%
,h)(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}^{\ast
}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])\right\} \right\vert =o_{p}(n^{-1/2}),
\label{PL-SA-L2-15}%
\end{equation}
which proves condition (21) in Assumption 3.4.(iii). Finally, Assumptions
3.4.(iv) in HLR follows by Assumptions \ref{L-SA-4}.(vi)-(vii).
\end{proof}
\section{Some Auxiliary Lemmas for Theorem 5.1 of HLR}
For the completeness of this section, we list the sufficient conditions of
Theorem 5.1 in HLR. To facilitate the presentation, we first review some
notations introduced in Section 5 and Appendix D of HLR. Recall that the basis
functions used in the first-step and second-step M estimations are $L\times
1$\ vector $R(x)$ and $K\times1$\ vector $P(\varepsilon)$ respectively. For
$j=1,2$, we define $\upsilon_{j,K}=\sup_{\varepsilon\in\mathcal{E}_{\eta}%
}\left\Vert \partial^{j}P(\varepsilon)^{\prime}\beta_{o,K}\right\Vert $, where
$\mathcal{E}_{\eta}=[a-\eta,b+\eta]$ for some $a0$, and $\beta_{o,K}\in%
%TCIMACRO{\U{211d} }%
%BeginExpansion
\mathbb{R}
%EndExpansion
^{K}$ is defined in Assumption \ref{L-A-1}.(iii) below. Let $\mathcal{N}%
_{g,n}=\{g\in\mathcal{G}_{n}:\left\Vert g-g_{o}\right\Vert _{2}\leq
\delta_{2,n}^{\ast}\log(\log(n))\}$ denote the local neighborhood of $g_{o}$,
where $\mathcal{G}_{n}$ denotes the sieve space of estimating $g_{o}$,
$\delta_{2,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-\rho_{g}}+\upsilon_{1,K}\delta
_{h,n}^{\ast}$ and $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-\rho_{h}}$. For
any column vector $a$, let $\Vert a\Vert$ denote its $\ell_{2}$-norm; for any
square matrix $A$, the operator norm is denoted by $||A||$; $\omega_{\max}(A)$
and $\omega_{\min}(A)$ denote the largest and smallest eigenvalues of a square
matrix $A$, respectively. We use $C$ to denote some generic finite positive
constant larger than 1.\ For $d$ a nonnegative integer, let $\left\vert
g\right\vert _{d}=\max_{|\tau|\leq d}\sup_{\varepsilon\in\mathcal{E}%
}\left\vert \partial^{\tau}g(\varepsilon)\right\vert $ for any $g\in
\mathcal{G}$ where $\mathcal{G}$ is the function space containing $g_{o}$. Let
$\left\Vert \cdot\right\Vert _{\infty}$ denote the uniform norm. For any
function $f$, $\mu_{n}(f)=n^{-1}\sum_{i=1}^{n}\left[ f(Z_{i})-\mathbb{E}%
\left[ f(Z_{i})\right] \right] $ denotes the empirical process indexed by
$f$.
\begin{assumption}
\label{L-A-0} (i) The data $\left\{ y_{i},x_{i},s_{i}\right\} _{i=1}^{n}$ is
i.i.d.; (ii) $\mathbb{E}\left[ \left. \varepsilon_{i}^{4}\right\vert
x_{i}\right] C^{-1}$; (iii)\ there exist $\rho_{h}>0$ and $\gamma_{o,L}\in%
%TCIMACRO{\U{211d} }%
%BeginExpansion
\mathbb{R}
%EndExpansion
^{L}$ such that%
\[
\left\Vert h_{o,L}-h_{o}\right\Vert _{\infty}=O(L^{-\rho_{h}})
\]
where $h_{o,L}\left( \cdot\right) \equiv R\left( \cdot\right) ^{\prime
}\gamma_{o,L}$; (iv) the eigenvalues of $Q_{L}$ are between $C^{-1}$ and $C$
for all $L$; (v) there exists a nondecreasing sequence $\zeta_{L}$ such that
$\sup_{x\in\mathcal{X}}\left\Vert R(x)\right\Vert \leq\zeta_{L}$.
\end{assumption}
\begin{assumption}
\label{L-A-1} (i) $\mathbb{E}[\left. u_{i}^{4}\right\vert \varepsilon_{i}]C^{-1}$; (ii)
$g_{o}(\varepsilon)$ is twice continuously differentiable; (iii) there exist
$\rho_{g}>0$ and $\beta_{o,K}\in%
%TCIMACRO{\U{211d} }%
%BeginExpansion
\mathbb{R}
%EndExpansion
^{K}$ such that%
\[
\left\vert g_{o,K}-g_{o}\right\vert _{d}=O(K^{-\rho_{g}})
\]
where $g_{o,K}\left( \cdot\right) =P\left( \cdot\right) ^{\prime}%
\beta_{o,K}$ and $d=1$; (iv) the eigenvalues of $Q_{K}$ are between $C^{-1}$
and $C$ for all $K$; (v) for $j=0,1,2$, there exists a nondecreasing sequence
$\xi_{j,K}$ such that $\sup_{\varepsilon\in\mathcal{E}_{\eta}}\left\Vert
\partial^{j}P(\varepsilon)\right\Vert \leq\xi_{j,K}$.
\end{assumption}
\begin{assumption}
\label{L-A-2} (i)\ $||v_{g_{n}}^{\ast}||_{2}\geq C$ for all $n$;\ (ii) the
functional $\rho(\cdot)$ satisfies
\[
\sup_{g\in\mathcal{N}_{g,n}}\left\vert \frac{\rho(g)-\rho(g_{o})-\partial
\rho(g_{o})[g-g_{o}]}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}\right\vert
=o(n^{-1/2});
\]
(iii) $\left\vert \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\partial
\rho(g_{o})[g_{o,n}-g_{o}]\right\vert =o(n^{-1/2})$; (iv) $\sup_{g\in
\mathcal{N}_{g,n}}\left\Vert \partial\rho(g)[P]-\partial\rho(g_{o}%
)[P]\right\Vert =o(1)$.
\end{assumption}
\begin{assumption}
\label{L-A-3} The following conditions hold:
(i)\ $n^{-1/2}(K+L)^{1/2}(\xi_{0,K}+\zeta_{L})(\log(n))^{1/2}=o(1)$;
(ii)\ $n^{-1}(L\xi_{1,K}^{2}\log(n)+\zeta_{L}\xi_{1,K})=o(1)$;
(iii)\ $n^{-1/2}\zeta_{L}(L\xi_{2,K}+L^{1/2}\xi_{1,K})(n^{-1/2}K^{1/2}%
+K^{-\rho_{g}}+v_{1,K}n^{-1/2}L^{1/2})\log(n)=o(1)$;
(iv)\ $n^{-1/2}\zeta_{L}(L+L^{1/2}v_{1,K}+Lv_{2,K})\log(n)=o(1)$;
(v)\ $nL^{1-2\rho_{h}}+K^{-\rho_{g}}=o(1)$.
\end{assumption}
\begin{assumption}
\label{L-A-4} The following conditions hold:
(i) $||v_{g_{n}}^{\ast}||_{2}\leq C$ for all $n$.
(ii) $(n^{-1}K\xi_{1,K}^{2}+(\zeta_{L}^{2}+\xi_{0,K}^{2}+\xi_{1,K}%
^{2})K^{-2\rho_{g}})\log(n)=o(1)$;
(iii) $n^{-1}(\zeta_{L}^{2}+\xi_{0,K}^{2}+\xi_{1,K}^{2})\upsilon_{1,K}%
^{2}L\log(n)=o(1)$.
\end{assumption}
\begin{lemma}
\label{AP-AL-1} Under Assumptions \ref{L-A-0}, \ref{L-A-1}.(iv)-(v),
\ref{L-A-3}.(i) and \ref{L-A-3}.(v), we have%
\[
\left\Vert \widehat{Q}_{n,K}-Q_{K}\right\Vert =O_{p}(\xi_{1,K}^{2}\delta
_{h,n}^{\ast2}+\xi_{1,K}\delta_{h,n}^{\ast}+n^{-1/2}\xi_{0,K}(\log K)^{1/2}),
\]
where $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-\rho_{h}}$.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AP-AL-1}]Let $B_{K}=\{ \lambda_{K}\in%
%TCIMACRO{\U{211d} }%
%BeginExpansion
\mathbb{R}
%EndExpansion
^{K}:\lambda_{K}^{\prime}\lambda_{K}=1\}$. Under Assumptions \ref{L-A-0}.(i),
\ref{L-A-1}.(iv)-(v) and \ref{L-A-3}.(i), we can invoke Lemma 6.2 of Belloni,
et al. (2015) to get%
\begin{equation}
\sup_{\lambda_{K}\in B_{K}}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \left\vert
\lambda_{K}^{\prime}P(\varepsilon_{i})\right\vert ^{2}\right] -\mathbb{E}%
\left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon_{i})\right\vert
^{2}\right] \right\vert =O_{p}(n^{-1/2}\xi_{0,K}(\log K)^{1/2}),
\label{P-AP-AL1-0}%
\end{equation}
which (together with Assumption \ref{L-A-3}.(i)) further implies that%
\begin{equation}
\left\Vert Q_{n,K}-Q_{K}\right\Vert =o_{p}(1) \label{P-AP-AL1-0A}%
\end{equation}
Under Assumptions \ref{L-A-0} and \ref{L-A-3}.(i), arguments in the proof of
Theorem 4.1 in Belloni et al.\ (2015) show that%
\begin{equation}
\left\Vert \widehat{\gamma}_{n}-\gamma_{o,L}\right\Vert =O_{p}(\delta
_{h,n}^{\ast}), \label{P-AP-AL1-1A}%
\end{equation}
which together with Assumptions \ref{L-A-0}.(iii)-(iv), and (\ref{P-L3-AP-8})
below (which is proved under Assumptions \ref{L-A-0} and \ref{L-A-3}.(i))
implies that
\begin{align}
n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat{h}_{n}(x_{i})-h_{o}%
(x_{i})\right\vert ^{2}\right] & \leq2n^{-1}\sum_{i=1}^{n}\left[
\left\vert \widehat{h}_{n}(x_{i})-h_{o,L}(x_{i})\right\vert ^{2}\right]
+2n^{-1}\sum_{i=1}^{n}\left[ \left\vert h_{o,L}(x_{i})-h_{o}(x_{i}%
)\right\vert ^{2}\right] \nonumber\\
& =2(\widehat{\gamma}_{n}-\gamma_{o,L})^{\prime}Q_{n,L}(\widehat{\gamma}%
_{n}-\gamma_{o,L})+O(L^{-2\rho_{h}})\nonumber\\
& \leq\omega_{\min}(Q_{n,L})\left\Vert \widehat{\gamma}_{n}-\gamma
_{o,L}\right\Vert ^{2}+O(L^{-2\rho_{h}})=O_{p}(\delta_{h,n}^{\ast2}).
\label{P-AP-AL1-1}%
\end{align}
Then by (\ref{P-AP-AL1-1}), and the definition of $\widehat{\varepsilon}_{i}$,%
\begin{equation}
n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat{\varepsilon}_{i}%
-\varepsilon_{i}\right\vert ^{2}\right] =n^{-1}\sum_{i=1}^{n}\left[
\left\vert \widehat{h}_{n}(x_{i})-h_{o}(x_{i})\right\vert ^{2}\right]
=O_{p}(\delta_{h,n}^{\ast2}). \label{P-AP-AL1-2}%
\end{equation}
Using (\ref{P-AP-AL1-1A}), Assumptions \ref{L-A-0}.(iii), (v) and
\ref{L-A-3}.(i), \ref{L-A-3}.(v), we have%
\begin{align}
\left\Vert \widehat{h}_{n}-h_{o}\right\Vert _{\infty} & \leq\left\Vert
\widehat{h}_{n}-h_{o,K}\right\Vert _{\infty}+\left\Vert h_{o,K}-h_{o}%
\right\Vert _{\infty}\nonumber\\
& =\left\Vert R(x)^{\prime}(\widehat{\gamma}_{n}-\gamma_{o,L})\right\Vert
_{\infty}+O(K^{-\rho_{h}})\nonumber\\
& \leq\zeta_{L}\left\Vert \widehat{\gamma}_{n}-\gamma_{o,L}\right\Vert
+O(K^{-\rho_{h}})=O_{p}(\zeta_{L}\delta_{h,n}^{\ast}), \label{P-AP-AL1-3}%
\end{align}
which implies that%
\begin{equation}
\max_{i\leq n}\left\vert \widehat{\varepsilon}_{i}-\varepsilon_{i}\right\vert
=\max_{i\leq n}\left\vert \widehat{h}_{n}(x_{i})-h_{o}(x_{i})\right\vert
\leq\left\Vert \widehat{h}_{n}-h_{o}\right\Vert _{\infty}=o_{p}(1).
\label{P-AP-AL1-4}%
\end{equation}
For any $\lambda_{K}\in B_{K}$, by the mean value expansion, the triangle
inequality and the Cauchy-Schwarz inequality,%
\begin{align}
\left\vert \left\vert \lambda_{K}^{\prime}P(\widehat{\varepsilon}%
_{i})\right\vert ^{2}-\left\vert \lambda_{K}^{\prime}P(\varepsilon
_{i})\right\vert ^{2}\right\vert & \leq\left\vert \lambda_{K}^{\prime
}(P(\widehat{\varepsilon}_{i})-P(\varepsilon_{i}))\right\vert ^{2}+2\left\vert
\lambda_{K}^{\prime}(P(\widehat{\varepsilon}_{i})-P(\varepsilon_{i}%
))\lambda_{K}^{\prime}P(\varepsilon_{i})\right\vert \nonumber\\
& =\left\vert \lambda_{K}^{\prime}\partial P(\widetilde{\varepsilon}%
_{i})(\widehat{\varepsilon}_{i}-\varepsilon_{i})\right\vert ^{2}+2\left\vert
\lambda_{K}^{\prime}\partial P(\widetilde{\varepsilon}_{i})\lambda_{K}%
^{\prime}P(\varepsilon_{i})(\widehat{\varepsilon}_{i}-\varepsilon
_{i})\right\vert \nonumber\\
& \leq\left\Vert \partial P(\widetilde{\varepsilon}_{i})\right\Vert
^{2}\left\vert \widehat{\varepsilon}_{i}-\varepsilon_{i}\right\vert
^{2}+2\left\Vert \partial P(\widetilde{\varepsilon}_{i})\right\Vert \left\vert
\lambda_{K}^{\prime}P(\varepsilon_{i})(\widehat{\varepsilon}_{i}%
-\varepsilon_{i})\right\vert \label{P-AP-AL1-5}%
\end{align}
where\ $\widetilde{\varepsilon}_{i}$ is between $\widehat{\varepsilon}_{i}$
and $\varepsilon_{i}$ for each $\lambda_{K}\in R^{K}$. By (\ref{P-AP-AL1-2}),
Assumption \ref{L-A-1}.(v) and $\widetilde{\varepsilon}_{i}\in\mathcal{E}%
_{\eta}$ for all $i\leq n$ wpa1\ (which is implied by (\ref{P-AP-AL1-4})),
\begin{equation}
\frac{\max_{i\leq n}\left\Vert \partial P(\widetilde{\varepsilon}%
_{i})\right\Vert ^{2}}{n}\sum_{i=1}^{n}\left\vert \widehat{\varepsilon}%
_{i}-\varepsilon_{i}\right\vert ^{2}=O_{p}(\xi_{1,K}^{2}\delta_{h,n}^{\ast2}).
\label{P-AP-AL1-6}%
\end{equation}
By the Cauchy-Schwarz inequality,
\begin{align}
& \sup_{\lambda_{K}\in B_{K}}\frac{\max_{i\leq n}\left\Vert \partial
P(\widetilde{\varepsilon}_{i})\right\Vert }{n}\sum_{i=1}^{n}\left\vert
\lambda_{K}^{\prime}P(\varepsilon_{i})(\widehat{\varepsilon}_{i}%
-\varepsilon_{i})\right\vert \nonumber\\
& \leq\sup_{\lambda_{K}\in B_{K}}\max_{i\leq n}\left\Vert \partial
P(\widetilde{\varepsilon}_{i})\right\Vert \left( n^{-1}\sum_{i=1}%
^{n}\left\vert \widehat{\varepsilon}_{i}-\varepsilon_{i}\right\vert
^{2}\right) ^{1/2}\left( n^{-1}\sum_{i=1}^{n}\left\vert \lambda_{K}^{\prime
}P(\varepsilon_{i})\right\vert ^{2}\right) ^{1/2}\nonumber\\
& =O_{p}(\xi_{1,K}\delta_{h,n}^{\ast}), \label{P-AP-AL1-7}%
\end{align}
where the equality is by (\ref{P-AP-AL1-6}) and $\sup_{\lambda_{K}\in B_{K}%
}n^{-1}\sum_{i=1}^{n}\left\vert \lambda_{K}^{\prime}P(\varepsilon
_{i})\right\vert ^{2}=O_{p}(1)$ which is implied by (\ref{P-AP-AL1-0}),
$\xi_{0,K}(\log K)^{1/2}n^{-1/2}=o(1)$ and $\sup_{\lambda_{K}\in B_{K}%
}\mathbb{E}\left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon)\right\vert
^{2}\right] \leq\omega_{\max}(Q_{K})\leq C$. By (\ref{P-AP-AL1-5}),
(\ref{P-AP-AL1-6}) and (\ref{P-AP-AL1-7}),
\begin{equation}
\sup_{\lambda_{K}\in B_{K}}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \left\vert
\lambda_{K}^{\prime}P(\widehat{\varepsilon}_{i})\right\vert ^{2}\right]
-n^{-1}\sum_{i=1}^{n}\left[ \left\vert \lambda_{K}^{\prime}P(\varepsilon
_{i})\right\vert ^{2}\right] \right\vert =O_{p}(\xi_{1,K}^{2}\delta
_{h,n}^{\ast2}+\xi_{1,K}\delta_{h,n}^{\ast}) \label{P-AP-AL1-8}%
\end{equation}
which together with (\ref{P-AP-AL1-0}) proves the claim of the Lemma.
\end{proof}
\bigskip
\begin{lemma}
\label{AP-AL-2} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1},
\ref{L-A-3}.(i)-(ii) and \ref{L-A-3}.(v)\ hold. Then we have%
\[
\left\Vert \widehat{\beta}_{n}-\beta_{o,K}\right\Vert =O_{p}(K^{1/2}%
n^{-1/2}+K^{-\rho_{g}}+\upsilon_{1,K}\delta_{h,n}^{\ast}),
\]
where $\upsilon_{1,K}=\sup_{\varepsilon\in\mathcal{E}_{\eta}}\left\vert
\partial P(\varepsilon)^{\prime}\beta_{o,K}\right\vert $.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AP-AL-2}]Let $G_{n}=\left[ g_{o}(\varepsilon_{1}%
),\ldots,g_{o}(\varepsilon_{n})\right] ^{\prime}$, $\widehat{G}_{K,n}=\left[
g_{o,K}(\widehat{\varepsilon}_{1}),\ldots,g_{o,K}(\widehat{\varepsilon}%
_{n})\right] ^{\prime}$ and $U_{n}=\left[ u_{1},\ldots,u_{n}\right]
^{\prime}$. By definition,
\begin{equation}
\widehat{\beta}_{n}=n^{-1}\widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime}%
(G_{n}+U_{n})=\beta_{o,K}+n^{-1}\widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime
}\left[ (G_{n}-G_{n,K})+(G_{n,K}-\widehat{G}_{n,K})+U_{n}\right] ,
\label{P-AP-AL2-1}%
\end{equation}
where $\widehat{Q}_{n,K}=n^{-1}\widehat{P}_{n}^{\prime}\widehat{P}_{n}\ $and
$G_{n,K}=\left[ g_{o,K}(\varepsilon_{1}),\ldots,g_{o,K}(\varepsilon
_{n})\right] ^{\prime}$. By Assumptions \ref{L-A-3}.(ii) and \ref{L-A-3}.(v),
$\xi_{1,K}\delta_{h,n}^{\ast}=o(1)$ which together with Assumption
\ref{L-A-3}.(i) and Lemma \ref{AP-AL-1} implies that%
\begin{equation}
(2C)^{-1}<\omega_{\min}(\widehat{Q}_{n,K})\leq\omega_{\max}(\widehat{Q}%
_{n,K})<2C\text{ wpa1.} \label{P-AP-AL2-2}%
\end{equation}
By (\ref{P-AP-AL2-2}) and Assumption \ref{L-A-1}.(iii),%
\begin{align}
& n^{-2}(G_{n}-G_{K,n})^{\prime}\widehat{P}_{n}\widehat{Q}_{n,K}^{-2}%
\widehat{P}_{n}^{\prime}(G_{n}-G_{K,n})\nonumber\\
& \leq\omega_{\min}^{-1}(\widehat{Q}_{n,K})n^{-2}(G_{n}-G_{K,n})^{\prime
}\widehat{P}_{n}\widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime}(G_{n}%
-G_{K,n})\nonumber\\
& =\omega_{\min}^{-1}(\widehat{Q}_{n,K})n^{-1}(G_{n}-G_{K,n})^{\prime
}\widehat{P}_{n}(\widehat{P}_{n}^{\prime}\widehat{P}_{n})^{-1}\widehat{P}%
_{n}^{\prime}(G_{n}-G_{K,n})\nonumber\\
& \leq O_{p}(1)n^{-1}\sum_{i=1}^{n}\left[ \left\vert g_{o}(\varepsilon
_{i})-g_{o,K}(\varepsilon_{i})\right\vert ^{2}\right] =O_{p}(K^{-2\rho_{g}}),
\label{P-AP-AL2-3}%
\end{align}
where the first equality is by the definition of $\widehat{Q}_{n,K}$, the
second inequality is by the fact that $\widehat{P}_{n}(\widehat{P}_{n}%
^{\prime}\widehat{P}_{n})^{-1}\widehat{P}_{n}^{\prime}$ is an idempotent
matrix. Similarly%
\begin{align}
& n^{-2}(G_{K,n}-\widehat{G}_{K,n})^{\prime}\widehat{P}_{n}\widehat{Q}%
_{n,K}^{-2}\widehat{P}_{n}^{\prime}(G_{K,n}-\widehat{G}_{K,n})\nonumber\\
& \leq O_{p}(1)n^{-1}(G_{K,n}-\widehat{G}_{K,n})^{\prime}\widehat{P}%
_{n}(\widehat{P}_{n}^{\prime}\widehat{P}_{n})^{-1}\widehat{P}_{n}^{\prime
}(G_{K,n}-\widehat{G}_{K,n})\nonumber\\
& \leq O_{p}(1)n^{-1}\sum_{i=1}^{n}\left[ \left\vert g_{o,K}(\varepsilon
_{i})-g_{o,K}(\widehat{\varepsilon}_{i})\right\vert ^{2}\right] .
\label{P-AP-AL2-4}%
\end{align}
By the mean value expansion and the Cauchy-Schwarz inequality,%
\begin{equation}
\left\vert g_{o,K}(\varepsilon_{i})-g_{o,K}(\widehat{\varepsilon}%
_{i})\right\vert =\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime
}\beta_{o,K}(\widehat{\varepsilon}_{i}-\varepsilon_{i})\right\vert \leq
\max_{i\leq n}\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime}%
\beta_{o,K}\right\vert \left\vert \widehat{\varepsilon}_{i}-\varepsilon
_{i}\right\vert , \label{P-AP-AL2-5}%
\end{equation}
where $\widetilde{\varepsilon}_{i}$ is between $\varepsilon_{i}$ and
$\widehat{\varepsilon}_{i}$. Using (\ref{P-AP-AL2-5}), we get%
\begin{equation}
n^{-1}\sum_{i=1}^{n}\left[ \left\vert g_{o,K}(\varepsilon_{i})-g_{o,K}%
(\widehat{\varepsilon}_{i})\right\vert ^{2}\right] \leq\max_{i\leq
n}\left\vert \partial P(\widetilde{\varepsilon}_{i})^{\prime}\beta
_{o,K}\right\vert ^{2}n^{-1}\sum_{i=1}^{n}\left[ \left\vert \widehat
{\varepsilon}_{i}-\varepsilon_{i}\right\vert ^{2}\right] =O_{p}%
(\upsilon_{1,K}^{2}\delta_{h,n}^{\ast2}), \label{P-AP-AL2-6}%
\end{equation}
where the equality is by (\ref{P-AP-AL1-2}) and $\max_{i\leq n}\left\vert
\partial P(\widetilde{\varepsilon}_{i})^{\prime}\beta_{o,K}\right\vert
^{2}=O_{p}(\upsilon_{1,K}^{2})$ which is implied by the definition of
$\upsilon_{1,K}$ and $\widetilde{\varepsilon}_{i}\in\mathcal{E}_{\eta}$ for
all $i\leq n$ wpa1 (which is implied by (\ref{P-AP-AL1-4})). Combining the
results in (\ref{P-AP-AL2-4}) and (\ref{P-AP-AL2-6}), we get%
\begin{equation}
n^{-2}(G_{K,n}-\widehat{G}_{K,n})^{\prime}\widehat{P}_{n}\widehat{Q}%
_{n,K}^{-2}\widehat{P}_{n}^{\prime}(G_{K,n}-\widehat{G}_{K,n})=O_{p}%
(\upsilon_{1,K}^{2}\delta_{h,n}^{\ast2}). \label{P-AP-AL2-7}%
\end{equation}
By Assumptions \ref{L-A-0}.(i) and \ref{L-A-1}.(i)
\begin{align}
& \mathbb{E}\left[ \left. n^{-2}U_{n}^{\prime}\widehat{P}_{n}\widehat
{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime}U_{n}\right\vert \{x_{i},s_{i}%
\}_{i=1}^{n}\right] \nonumber\\
& =tr\left( n^{-2}\widehat{P}_{n}\widehat{Q}_{n,K}^{-1}\widehat{P}%
_{n}^{\prime}\mathbb{E}\left[ \left. U_{n}U_{n}^{\prime}\right\vert
x_{i},s_{i}\}_{i=1}^{n}\right] \right) \nonumber\\
& \leq\frac{C}{n}tr\left( \widehat{Q}_{n,K}^{-1}\widehat{P}_{n}^{\prime
}\widehat{P}_{n}/n\right) =O(Kn^{-1}) \label{P-AP-AL2-8}%
\end{align}
which together with (\ref{P-AP-AL2-2}) and the Markov inequality implies that%
\begin{equation}
n^{-2}U_{n}^{\prime}\widehat{P}_{n}\widehat{Q}_{n,K}^{-2}\widehat{P}%
_{n}^{\prime}U_{n}=O_{p}(Kn^{-1}). \label{P-AP-AL2-9}%
\end{equation}
Collecting the results in (\ref{P-AP-AL2-1}), (\ref{P-AP-AL2-3}),
(\ref{P-AP-AL2-7}) and (\ref{P-AP-AL2-9}), we prove the claim of the Lemma.
\end{proof}
\bigskip
\begin{lemma}
\label{L-AP-1} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2}
and \ref{L-A-3} hold. Then Assumption 3.1 in HLR holds.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-AP-1}]The the definition of $\left\Vert v_{n}^{\ast
}\right\Vert _{sd}^{2}$, Assumptions \ref{L-A-1}.(i) and \ref{L-A-2}.(i),
\begin{equation}
\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}=\left\Vert v_{\Gamma_{n}}^{\ast
}(x)\varepsilon\right\Vert _{2}^{2}+\left\Vert v_{g_{n}}^{\ast}(\varepsilon
)u\right\Vert _{2}^{2}\geq\left\Vert v_{g_{n}}^{\ast}(\varepsilon)u\right\Vert
_{2}^{2}\geq C^{-1}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{2}\geq C^{-1}
\label{P-LAP-1}%
\end{equation}
for all $n$, which verifies Assumption 3.1.(i) in HLR. Assumption 3.1.(ii) in
HLR is directly assumed in Assumption \ref{L-A-2}.(ii). By Lemma
\ref{AP-AL-2}, we know that $\delta_{2,n}^{\ast}=n^{-1/2}K^{1/2}+K^{-\rho_{g}%
}+\upsilon_{1,K}\delta_{h,n}^{\ast}$, where $\upsilon_{1,K}=\sup
_{\varepsilon\in\mathcal{E}_{\eta}}\left\vert \partial P(\varepsilon)^{\prime
}\beta_{o,K}\right\vert $. Let $g_{n}=g_{o,K}$, then by Assumption
\ref{L-A-1}.(iii), we have $\left\Vert g_{n}-g_{o}\right\Vert _{2}%
=O(K^{-\rho_{g}})=O(\delta_{2,n}^{\ast})$. By the definitions of $\left\Vert
\cdot\right\Vert _{\varphi}$ and $\left\Vert \cdot\right\Vert _{\psi}$,\ we
can set $c_{\varphi}=1$ and $c_{\psi}=1$ such that $\left\Vert v_{h}%
\right\Vert _{\varphi}\leq c_{\varphi}\left\Vert v_{h}\right\Vert
_{\mathcal{H}}$ and $\left\Vert v_{g}\right\Vert _{\psi}\leq c_{\psi
}\left\Vert v_{g}\right\Vert _{\mathcal{G}}$ for any $v_{h}\in\mathcal{V}_{1}$
and $v_{g}\in\mathcal{V}_{2}$.\ This verifies Assumption 3.1.(iii) in HLR.
Assumption 3.1.(iv) in HLR is assumed in Assumptions \ref{L-A-2}.(iii).
\end{proof}
\bigskip
\begin{lemma}
\label{L-AP-2} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2}
and \ref{L-A-3} hold. Then Assumption 3.2 in HLR holds.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-AP-2}]For ease of notation, we define $\varepsilon
_{h}=s-h(x)$. By definition,%
\begin{align}
& \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)-\Delta_{\psi}(Z_{2},g,h)[\pm
\kappa_{n}u_{g_{n}}^{\ast}]\nonumber\\
& =-\frac{1}{2}\left[ \left\vert y-g(\varepsilon_{h})\mp\kappa_{n}u_{g_{n}%
}^{\ast}(\varepsilon)\right\vert ^{2}\right] +\frac{1}{2}\left[ \left\vert
y-g(\varepsilon_{h})\right\vert ^{2}\right] -\left[ y-g(\varepsilon
_{h})\right] (\pm\kappa_{n}u_{g_{n}}^{\ast})\nonumber\\
& =-\frac{1}{2}\kappa_{n}^{2}(u_{g_{n}}^{\ast}(\varepsilon))^{2}.
\label{P-L2-AP-1}%
\end{align}
By Assumption \ref{L-A-1}.(i),
\begin{equation}
\mathbb{E}\left[ (u_{g_{n}}^{\ast}(\varepsilon))^{2}\right] =\frac
{\mathbb{E}\left[ |v_{g_{,n}}^{\ast}(\varepsilon)|^{2}\right] }{\left\Vert
v_{\Gamma_{n}}^{\ast}(x)\varepsilon\right\Vert _{2}^{2}+\left\Vert v_{g_{n}%
}^{\ast}(\varepsilon)u\right\Vert _{2}^{2}}\leq\frac{\mathbb{E}\left[
|v_{g_{,n}}^{\ast}(\varepsilon)|^{2}\right] }{\left\Vert v_{\Gamma_{n}}%
^{\ast}(x)\varepsilon\right\Vert _{2}^{2}+C^{-1}\left\Vert v_{g_{n}}^{\ast
}(\varepsilon)\right\Vert _{2}^{2}}\leq C \label{P-L2-AP-2}%
\end{equation}
which together with the Markov inequality, Assumption \ref{L-A-0}.(i) and
(\ref{P-L2-AP-1}) verifies Assumption 3.2.(i) in HLR.
By definition,
\begin{equation}
\Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}%
,h_{o})[u_{g_{n}}^{\ast}]=\left( g_{o}(\varepsilon)-g(\varepsilon
_{h})\right) u_{g_{n}}^{\ast}. \label{P-L2-AP-3}%
\end{equation}
Recall that $\mathcal{N}_{h,n}=\{h\in\mathcal{H}_{n}:\left\Vert h-h_{o}%
\right\Vert _{2}\leq\delta_{1,n}\}$, where $\delta_{1,n}=\delta_{h,n}^{\ast
}\log(\log(n))$. It is clear that for any $h(\cdot)=R(\cdot)^{\prime}%
\gamma_{L}\in\mathcal{N}_{h,n}$, we have%
\begin{align}
\left\Vert h-h_{o}\right\Vert _{\infty} & \leq\left\Vert h-h_{o,L}%
\right\Vert _{\infty}+\left\Vert h_{o,L}-h_{o}\right\Vert _{\infty}\nonumber\\
& \leq\left\Vert R(x)^{\prime}(\gamma_{L}-\gamma_{o,L})\right\Vert _{\infty
}+CL^{-\rho_{h}}\nonumber\\
& \leq\zeta_{L}\left\Vert \gamma_{L}-\gamma_{o,L}\right\Vert +CL^{-\rho_{h}%
}\nonumber\\
& \leq\zeta_{L}\omega_{\min}^{-1/2}(Q_{L})((\gamma_{L}-\gamma_{o,L})^{\prime
}Q_{L}(\gamma_{L}-\gamma_{o,L}))^{1/2}+CL^{-\rho_{h}}\nonumber\\
& =\zeta_{L}\omega_{\min}^{-1/2}(Q_{L})\left\Vert h-h_{o,K}\right\Vert
_{2}+CL^{-\rho_{h}}\nonumber\\
& \leq\zeta_{L}\omega_{\min}^{-1/2}(Q_{L})\left[ \left\Vert h-h_{o}%
\right\Vert _{2}+\left\Vert h_{o,K}-h_{o}\right\Vert _{2}\right]
+CL^{-\rho_{h}}\leq C\zeta_{L}\delta_{1,n} \label{P-L2-AP-3A}%
\end{align}
where the last inequality is by Assumption \ref{L-A-0}.(iii)-(iv) and the
definition of $\delta_{1,n}$. Define%
\[
\mathcal{F}_{n}=\left\{ f(s,x,h,g):f(s,x,h,g)=\left( g_{o}(\varepsilon
)-g(\varepsilon_{h})\right) u_{g_{n}}^{\ast}(\varepsilon)\text{, }%
g\in\mathcal{N}_{g,n}\text{, }h\in\mathcal{N}_{h,n}\right\} ,
\]
where $\mathcal{N}_{g,n}=\{g\in\mathcal{G}_{n}:\left\Vert g-g_{o}\right\Vert
_{2}\leq\delta_{2,n}\}$ and $\delta_{2,n}=\delta_{2,n}^{\ast}\log(\log(n))$.
By Assumptions \ref{L-A-3}.(i) and \ref{L-A-3}.(v), $\zeta_{L}\delta
_{1,n}=o(1)$. Hence by (\ref{P-L2-AP-3A}) we can let $n$ sufficiently large
such that $\zeta_{L}\delta_{1,n}<\eta/2$ and $\varepsilon_{h}\in
\mathcal{E}_{\eta}$ for any $h\in\mathcal{N}_{h,n}$. By the mean value
expansion, $g(\varepsilon_{h})-g(\varepsilon)=\partial P(\widetilde
{\varepsilon}_{h})^{\prime}\beta(\varepsilon_{h}-\varepsilon)$ where
$\widetilde{\varepsilon}_{h}$ is between $\varepsilon_{h}$ and $\varepsilon$.
As $\varepsilon_{h}\in\mathcal{E}_{\eta}$ for any $h\in\mathcal{N}_{h,n}$, we
have $\widetilde{\varepsilon}_{h}\in\mathcal{E}_{\eta}$. Hence for any
$g\left( \cdot\right) =P(\cdot)^{\prime}\beta$ with $g\left( \cdot\right)
\in\mathcal{N}_{g,n}$ and any $h\in\mathcal{N}_{h,n}$, we have%
\begin{align}
\left\vert g(\varepsilon_{h})-g(\varepsilon)\right\vert & \leq\left\vert
\partial P(\widetilde{\varepsilon}_{h})^{\prime}(\beta-\beta_{o,K}%
)(\varepsilon_{h}-\varepsilon)\right\vert +\left\vert \partial P(\widetilde
{\varepsilon}_{h})^{\prime}\beta_{o,K}(\varepsilon_{h}-\varepsilon)\right\vert
\nonumber\\
& =\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}(\beta
-\beta_{o,K})(h(x)-h_{o}(x))\right\vert +\left\vert \partial P(\widetilde
{\varepsilon}_{h})^{\prime}\beta_{o,K}(h(x)-h_{o}(x))\right\vert \nonumber\\
& \leq\left[ \left\Vert \partial P(\widetilde{\varepsilon}_{h})\right\Vert
\left\Vert \beta-\beta_{o,K}\right\Vert +\left\vert \partial P(\widetilde
{\varepsilon}_{h})^{\prime}\beta_{o,K}\right\vert \right] \left\Vert
h-h_{o}\right\Vert _{\infty}\nonumber\\
& \leq(\xi_{1,K}\delta_{2,n}+\upsilon_{1,K})\zeta_{L}\delta_{1,n}\leq
(\xi_{1,K}\delta_{1,n}+1)\zeta_{L}\delta_{2,n}\leq C\zeta_{L}\delta_{2,n}
\label{P-L2-AP-4}%
\end{align}
where the first inequality is by the mean value expansion and the triangle
inequality, the equality is by the definitions of $\varepsilon_{h}$ and
$\varepsilon$, the second inequality is by the Cauchy-Schwarz inequality, the
third inequality is by Assumption \ref{L-A-1}.(v), (\ref{P-L2-AP-3A}), the
definitions of $\upsilon_{1,K}$ and $\mathcal{N}_{h,n}$, and
\begin{equation}
\left\Vert \beta-\beta_{o,K}\right\Vert \leq\omega_{\min}^{-1}(Q_{K})\left(
\left\Vert g-g_{o}\right\Vert _{2}+\left\Vert g_{o}-g_{o,K}\right\Vert
_{2}\right) \leq C\delta_{2,n} \label{P-L2-AP-4A}%
\end{equation}
which is implied by Assumption \ref{L-A-1}.(iii) and the definition of
$\mathcal{N}_{g,n}$, the fourth inequality is because $\upsilon_{1,K}%
\delta_{1,n}\leq\delta_{2,n}$ by definition, the last inequality in
(\ref{P-L2-AP-4}) is by $\xi_{1,K}\delta_{1,n}=O(1)$ which is implied by
Assumptions \ref{L-A-3}.(ii) and \ref{L-A-3}.(v). By the triangle inequality
and the Cauchy-Schwarz inequality,
\begin{equation}
\left\vert g(\varepsilon)-g_{o}(\varepsilon)\right\vert \leq\left\Vert
\beta-\beta_{o,K}\right\Vert \xi_{0,K}+\left\Vert g_{o}-g_{o,K}\right\Vert
_{\infty}\leq C\xi_{0,K}\delta_{2,n} \label{P-L2-AP-5}%
\end{equation}
where the last inequality is by Assumption \ref{L-A-1}.(iii) and
(\ref{P-L2-AP-4A}). By the definition of $u_{g_{n}}^{\ast}$, Assumptions
\ref{L-A-1}.(iv)-(v) and (\ref{P-LAP-1}),
\begin{equation}
\sup_{\varepsilon\in\mathcal{E}}\left\vert u_{g_{n}}^{\ast}(\varepsilon
)\right\vert ^{2}\leq\frac{\xi_{0,K}^{2}\partial\rho(g_{o})\left[ P\right]
^{\prime}Q_{K}^{-2}\partial\rho(g_{o})\left[ P\right] }{C^{-1}\left\Vert
v_{g_{n}}^{\ast}\right\Vert _{2}^{2}}=\frac{C\xi_{0,K}^{2}\partial\rho
(g_{o})\left[ P\right] ^{\prime}Q_{K}^{-2}\partial\rho(g_{o})\left[
P\right] }{\partial\rho(g_{o})[P]^{\prime}Q_{K}^{-1}\partial\rho
(g_{o})\left[ P\right] }\leq C\xi_{0,K}^{2}. \label{P-L2-AP-5B}%
\end{equation}
Combining the results in (\ref{P-L2-AP-4}), (\ref{P-L2-AP-5}) and
(\ref{P-L2-AP-5B}), we get
\begin{align}
\sup_{f\in\mathcal{F}_{n}}\left\Vert f\right\Vert _{\infty} & \leq\sup
_{g\in\mathcal{N}_{g,n}\text{, }h\in\mathcal{N}_{h,n}\text{, }\varepsilon
\in\mathcal{E}}\left[ \left\vert g(\varepsilon_{h})-g(\varepsilon)\right\vert
+\left\vert g(\varepsilon)-g_{o}(\varepsilon)\right\vert \right]
\sup_{\varepsilon\in\mathcal{E}}\left\vert u_{g_{n}}^{\ast}(\varepsilon
)\right\vert \nonumber\\
& \leq C(\zeta_{L}+\xi_{0,K})\xi_{0,K}\delta_{2,n}\equiv M_{n}.
\label{P-L2-AP-6}%
\end{align}
For any $f\in\mathcal{F}_{n}$, by (\ref{P-L2-AP-4}) and (\ref{P-L2-AP-5}),
\begin{align}
\mathbb{E}\left[ f^{2}\right] & \leq2E\left[ \left( g(\varepsilon
_{h})-g(\varepsilon)\right) ^{2}(u_{g_{n}}^{\ast}(\varepsilon))^{2}\right]
+2E\left[ \left( g(\varepsilon)-g_{o}(\varepsilon)\right) ^{2}(u_{g_{n}%
}^{\ast}(\varepsilon))^{2}\right] \nonumber\\
& \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\mathbb{E}\left[
(u_{g_{n}}^{\ast}(\varepsilon))^{2}\right] \leq C(\zeta_{L}^{2}+\xi_{0,K}%
^{2})\delta_{2,n}^{2}\equiv d_{n}^{2} \label{P-L2-AP-7}%
\end{align}
where the last inequality is by (\ref{P-L2-AP-2}). For any $f_{1}%
=f(\cdot,h_{1},g_{1})$ and any $f_{2}=f(\cdot,h_{2},g_{2})$ where $h_{1}%
,h_{2}\in\mathcal{N}_{h,n}$ and $g_{1},g_{2}\in\mathcal{N}_{g,n}$, by the
triangle inequality,
\begin{align}
\left\vert f_{1}-f_{2}\right\vert & \leq\left\vert \left( g_{1}%
(\varepsilon_{h_{1}})-g_{1}(\varepsilon_{h_{2}})\right) u_{g_{n}}^{\ast
}(\varepsilon)\right\vert +\left\vert \left( g_{1}(\varepsilon_{h_{2}}%
)-g_{2}(\varepsilon_{h_{2}})\right) u_{g_{n}}^{\ast}(\varepsilon)\right\vert
\nonumber\\
& \leq\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}\beta
_{1}(\varepsilon_{h_{1}}-\varepsilon_{h_{2}})u_{g_{n}}^{\ast}(\varepsilon
)\right\vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert
\left\Vert \beta_{1}-\beta_{2}\right\Vert \nonumber\\
& =\left\vert \partial P(\widetilde{\varepsilon}_{h})^{\prime}\left[
(\beta_{1}-\beta_{o,K})+\beta_{o,K}\right] (h_{1}(x)-h_{2}(x))u_{g_{n}}%
^{\ast}(\varepsilon)\right\vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast
}(\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert
\nonumber\\
& \leq\left[ \left\Vert \partial P(\widetilde{\varepsilon}_{h})\right\Vert
\left\Vert \beta_{1}-\beta_{o,K}\right\Vert +\left\vert \partial
P(\widetilde{\varepsilon}_{h})^{\prime}\beta_{o,K}\right\vert \right]
\left\vert R(x)^{\prime}(\gamma_{1}-\gamma_{2})\right\vert \left\vert
u_{g_{n}}^{\ast}(\varepsilon)\right\vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast
}(\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert
\nonumber\\
& \leq\left[ \xi_{1,K}\delta_{2,n}+\upsilon_{1,K}\right] \zeta
_{L}\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert \left\Vert \gamma
_{1}-\gamma_{2}\right\Vert +\xi_{0,K}\left\vert u_{g_{n}}^{\ast}%
(\varepsilon)\right\vert \left\Vert \beta_{1}-\beta_{2}\right\Vert \nonumber\\
& \leq F_{n}(\varepsilon)(\left\Vert \beta_{1}-\beta_{2}\right\Vert
+\left\Vert \gamma_{1}-\gamma_{2}\right\Vert ), \label{P-L2-AP-8}%
\end{align}
where $F_{n}(\varepsilon)=C(\xi_{1,K}\zeta_{L}\delta_{2,n}+\upsilon_{1,K}%
\zeta_{L}+\xi_{0,K})\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert $, the
equality is by the definitions of $\varepsilon_{h_{1}}$ and $\varepsilon
_{h_{2}}$, the fourth inequality is by $\left\Vert \partial P(\widetilde
{\varepsilon}_{h})\right\Vert \leq\xi_{1,K}$ and $\left\Vert \beta_{1}%
-\beta_{o,K}\right\Vert \leq\delta_{2,n}$ for any $h_{1},h_{2}\in
\mathcal{N}_{h,n}$, and
\begin{equation}
\left\vert R(x)^{\prime}(\gamma_{1}-\gamma_{2})\right\vert \leq\left\Vert
R(x)\right\Vert \left\Vert \gamma_{1}-\gamma_{2}\right\Vert \leq\zeta
_{L}\left\Vert \gamma_{1}-\gamma_{2}\right\Vert \label{P-L2-AP-8A}%
\end{equation}
which is implied by the triangle inequality and the definition of $\zeta_{L}$.
By (\ref{P-L2-AP-2}), $\left\Vert F_{n}\right\Vert _{2}\leq C(\xi_{1,K}%
\zeta_{L}\delta_{2,n}+\upsilon_{1,K}\zeta_{L}+\xi_{0,K})\equiv\xi_{F_{n}}$.
Let $H_{[]}\left( u,\mathcal{F}_{n},\left\Vert \cdot\right\Vert _{2}\right)
$ denote the $u$-bracketing entropy number of the function space
$\mathcal{F}_{n}$ under the $L_{2}$-norm. By Example 19.7 in Van der Vaart
(1998), $H_{[]}\left( u\left\Vert F_{n}\right\Vert _{2},\mathcal{F}%
_{n},\left\Vert \cdot\right\Vert _{2}\right) \leq(Cu^{-1})^{L+K}$ for all
$u\in(0,1)$. Hence%
\begin{equation}
J_{[]}\left( d_{n},\mathcal{F}_{n},\left\Vert \cdot\right\Vert _{2}\right)
=\int_{0}^{d_{n}}(\log H_{[]}\left( u,\mathcal{F}_{n},\left\Vert
\cdot\right\Vert _{2}\right) )^{1/2}du\leq C(K+L)^{1/2}(\log(n))^{1/2}d_{n}
\label{P-L2-AP-9}%
\end{equation}
where the inequality is by $d_{n}^{-1}\leq Cn$ and $\xi_{F_{n}}\leq Cn$ which
are implied by Assumption \ref{L-A-3}. By (\ref{P-L2-AP-6}), (\ref{P-L2-AP-7}%
), (\ref{P-L2-AP-9}) and Lemma 19.36 in Van der Vaart (1998),%
\begin{align}
& \mathbb{E}\left[ \sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{
\Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}%
,h_{o})[u_{g_{n}}^{\ast}]\right\} \right\vert \right] \nonumber\\
& \leq\frac{J_{[]}\left( d_{n},\mathcal{F}_{n},\left\Vert \cdot\right\Vert
_{2}\right) }{n^{1/2}}\left( 1+\frac{J_{[]}\left( d_{n},\mathcal{F}%
_{n},\left\Vert \cdot\right\Vert _{2}\right) }{d_{n}^{2}n^{1/2}}M_{n}\right)
\nonumber\\
& \leq C\frac{(K+L)^{1/2}(\log(n))^{1/2}}{n^{1/2}}d_{n}\left( 1+\frac
{(K+L)^{1/2}(\log(n))^{1/2}}{d_{n}n^{1/2}}M_{n}\right) \nonumber\\
& \leq C\frac{(K+L)^{1/2}(\log(n))^{1/2}d_{n}}{n^{1/2}}\left( 1+\frac
{(K+L)^{1/2}\xi_{0,K}(\log(n))^{1/2}}{n^{1/2}}\right) =o_{p}(1)
\label{P-L2-AP-10}%
\end{align}
where the equality is by Assumptions \ref{L-A-3}.(i), and \ref{L-A-3}.(v).
Using (\ref{P-L2-AP-10}) and the Markov inequality, we get
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\left\vert \mu_{n}\left\{ \Delta_{\psi}%
(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o},h_{o})[u_{g_{n}}%
^{\ast}]\right\} \right\vert =o_{p}(n^{-1/2}), \label{P-L2-AP-11}%
\end{equation}
which verifies Assumption 3.2.(ii) in HLR.
By Assumption \ref{L-A-1}.(i), (\ref{P-L2-AP-2}) and $\mathbb{E}\left[
\left. u\right\vert \varepsilon\right] =0$,%
\begin{align}
K_{\psi}(g,h)-K_{\psi}(g^{\ast},h) & =\mathbb{E}\left[ -\frac{1}%
{2}\left\vert y-g(\varepsilon_{h})\right\vert ^{2}\right] -\mathbb{E}\left[
-\frac{1}{2}\left\vert y-g^{\ast}(\varepsilon_{h})\right\vert ^{2}\right]
\nonumber\\
& =\mathbb{E}\left[ -\frac{1}{2}\left\vert y-g(\varepsilon_{h})\right\vert
^{2}\right] -\mathbb{E}\left[ -\frac{1}{2}\left\vert y-g(\varepsilon_{h}%
)\mp\kappa_{n}u_{g_{n}}^{\ast}(\varepsilon)\right\vert ^{2}\right] \nonumber\\
& =\mathbb{E}\left[ \kappa_{n}^{2}\frac{(u_{g_{n}}^{\ast}(\varepsilon))^{2}%
}{2}\mp\kappa_{n}u_{g_{n}}^{\ast}(\varepsilon)u\pm\kappa_{n}u_{g_{n}}^{\ast
}(\varepsilon)(g(\varepsilon_{h})-g_{o}(\varepsilon))\right] \nonumber\\
& =\pm\kappa_{n}\mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)(g(\varepsilon
_{h})-g_{o}(\varepsilon))\right] +O(\kappa_{n}^{2}). \label{P-L2-AP-12}%
\end{align}
By the second order expansion, $g(\varepsilon_{h})-g(\varepsilon)=\partial
g(\varepsilon)(\varepsilon_{h}-\varepsilon)+\partial^{2}g(\widetilde
{\varepsilon}_{h})(\varepsilon_{h}-\varepsilon)^{2}$, where $\widetilde
{\varepsilon}_{h}\in\mathcal{E}_{\eta}$ for any $h\in\mathcal{N}_{h,n}$. For
any $g\left( \cdot\right) =P\left( \cdot\right) ^{\prime}\beta
\in\mathcal{N}_{g}$, we have%
\begin{equation}
\left\Vert \beta\right\Vert \leq\left\Vert \beta-\beta_{o,K}\right\Vert
+\left\Vert \beta_{o,K}\right\Vert \leq C\delta_{2,n}+\left\Vert \beta
_{o,K}\right\Vert \leq C \label{P-L2-AP-12A}%
\end{equation}
where the second inequality is (\ref{P-L2-AP-4A}), Assumptions \ref{L-A-3}%
.(i)-(ii) and \ref{L-A-3}.(v), the third inequality is by
\begin{equation}
\left\Vert \beta_{o,K}\right\Vert \leq\omega_{\min}^{-1}(Q_{K})\left\Vert
g_{o,K}\right\Vert _{2}\leq\omega_{\min}^{-1}(Q_{K})\left[ \left\Vert
g_{o,K}-g_{o}\right\Vert _{2}+\left\Vert g_{o}\right\Vert \right] \leq C
\label{P-L2-AP-12B}%
\end{equation}
where the third inequality is by Assumptions \ref{L-A-1}.(ii)-(iv). Note that
for any $g(\cdot)=P(\cdot)^{\prime}\beta_{K}\in N_{g,n}$, we have $\left\Vert
g-g_{o}\right\Vert _{2}\leq\delta_{2,n}$, which together with Assumption
\ref{L-A-1}.(iii) and the definition of $\delta_{2,n}$ implies that
\begin{equation}
\left\Vert g-g_{o,K}\right\Vert _{2}\leq\left\Vert g-g_{o}\right\Vert
_{2}+\left\Vert g_{o,K}-g_{o}\right\Vert _{2}\leq2\delta_{2,n}.
\label{P-L2-AP-12C}%
\end{equation}
By (\ref{P-L2-AP-12C}) and Assumption \ref{L-A-1}.(iv),%
\begin{equation}
\left\Vert \beta-\beta_{o,K}\right\Vert ^{2}\leq\omega_{\min}^{-1}%
(Q_{K})(\beta-\beta_{o,K})^{\prime}Q_{K}(\beta-\beta_{o,K})=\omega_{\min}%
^{-1}(Q_{K})\left\Vert g-g_{o,K}\right\Vert _{2}^{2}\leq C\delta_{2,n}.
\label{P-L2-AP-12D}%
\end{equation}
By (\ref{P-L2-AP-12A}), $\left\vert \partial^{2}g(\widetilde{\varepsilon}%
_{h})\right\vert \leq C\xi_{2,K}$, which together with (\ref{P-L2-AP-2}),
(\ref{P-AP-AL1-1}), (\ref{P-AP-AL1-3}) and (\ref{P-L2-AP-12D}) implies that%
\begin{align}
\mathbb{E}\left[ \left\vert \partial^{2}g(\widetilde{\varepsilon}%
_{h})(\varepsilon_{h}-\varepsilon)^{2}u_{g_{n}}^{\ast}(\varepsilon)\right\vert
\right] & \leq\mathbb{E}\left[ \left\vert \partial^{2}P(\widetilde
{\varepsilon}_{h})^{\prime}(\beta-\beta_{o,K})(\varepsilon_{h}-\varepsilon
)^{2}u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] \nonumber\\
& +\mathbb{E}\left[ \left\vert \partial^{2}P(\widetilde{\varepsilon}%
_{h})^{\prime}\beta_{o,K}(\varepsilon_{h}-\varepsilon)^{2}u_{g_{n}}^{\ast
}(\varepsilon)\right\vert \right] \nonumber\\
& \leq\left( \xi_{2,K}\left\Vert \beta-\beta_{o,K}\right\Vert +\upsilon
_{2,K}\right) \mathbb{E}\left[ \left\vert (\varepsilon_{h}-\varepsilon
)^{2}u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] \nonumber\\
& \leq\left( \xi_{2,K}\delta_{2,n}+\upsilon_{2,K}\right) \zeta_{L}%
\delta_{1,n}\mathbb{E}\left[ \left\vert (\varepsilon_{h}-\varepsilon
)u_{g_{n}}^{\ast}(\varepsilon)\right\vert \right] \nonumber\\
& \leq\left( \xi_{2,K}\delta_{2,n}+\upsilon_{2,K}\right) \zeta_{L}%
\delta_{1,n}^{2}=o(n^{-1/2}) \label{P-L2-AP-13}%
\end{align}
for any $g\in\mathcal{N}_{g,n}$ and $h\in\mathcal{N}_{h,n}$, where the
equality is by Assumptions \ref{L-A-3}.(iii)-(v). By (\ref{P-L2-AP-13}),
\begin{equation}
\mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)(g(\varepsilon_{h}%
)-g(\varepsilon))\right] =\pm\kappa_{n}\mathbb{E}\left[ u_{g_{n}}^{\ast
}(\varepsilon)\partial g(\varepsilon)(\varepsilon_{h}-\varepsilon)\right]
+o(n^{-1/2}). \label{P-L2-AP-14}%
\end{equation}
By Jensen's inequality, the Holder inequality, (\ref{P-L2-AP-2}), Assumptions
\ref{L-A-0}.(iii), \ref{L-A-1}.(ii),\ \ref{L-A-3}.(v) and the definition of
$h_{o,n}$,
\begin{align}
\left\vert \mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)\partial
g(\varepsilon)(\varepsilon_{h_{o,n}}-\varepsilon)\right] \right\vert &
=\left\vert \mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)\partial
g(\varepsilon)(h_{o}-h_{o,n})\right] \right\vert \nonumber\\
& \leq C(\mathbb{E}\left[ (u_{g_{n}}^{\ast}(\varepsilon))^{2}\right]
\mathbb{E}\left[ (h_{o}-h_{o,n})^{2}\right] )^{1/2}\nonumber\\
& \leq C(\mathbb{E}\left[ (h_{o}-h_{o,n})^{2}\right] )^{1/2}=o(n^{-1/2}).
\label{P-L2-AP-14a}%
\end{align}
Combining the results in (\ref{P-L2-AP-12}), (\ref{P-L2-AP-14}) and
(\ref{P-L2-AP-14a}), we get%
\begin{equation}
K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\Gamma(\alpha_{o})\left[
h-h_{o,n},u_{g_{n}}^{\ast}\right] \pm\kappa_{n}\mathbb{E}\left[ u_{g_{n}%
}^{\ast}(\varepsilon)(g(\varepsilon)-g_{o}(\varepsilon))\right] +o(n^{-1}).
\label{P-L2-AP-15}%
\end{equation}
By the definition of $\left\Vert \cdot\right\Vert _{\psi}$ and
(\ref{P-L2-AP-2}),
\begin{equation}
\frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}=\pm\kappa
_{n}\mathbb{E}\left[ u_{g_{n}}^{\ast}(\varepsilon)(g(\varepsilon
)-g_{o}(\varepsilon))\right] +o_{p}(n^{-1}) \label{P-L2-AP-16}%
\end{equation}
which together with (\ref{P-L2-AP-15}) verifies Assumption 3.2.(iii) in HLR.
\end{proof}
\bigskip
\begin{lemma}
\label{L-AP-3} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2}
and \ref{L-A-3} hold. Then Assumption 3.3 in HLR holds.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-AP-3}]As the functional value $\rho(g_{o})$ only
depends on $g_{o}$, we know that $u_{h_{n}}^{\ast}=0$. By Assumption
\ref{L-A-0}.(i),%
\begin{equation}
\mathbb{E}\left[ (u_{\Gamma_{n}}^{\ast}(x))^{2}\right] \leq\frac{\left\Vert
v_{\Gamma_{n}}^{\ast}(x)\right\Vert _{2}^{2}}{\left\Vert v_{\Gamma_{n}}^{\ast
}(x)\right\Vert _{2}^{2}C^{-1}+\left\Vert v_{g_{n}}^{\ast}(\varepsilon
)u\right\Vert _{2}^{2}}\leq C, \label{P-L3-AP-1}%
\end{equation}
which together with the H\"{o}lder inequality and Assumption \ref{L-A-0}.(iii)
implies that\
\begin{equation}
\left\vert \langle h_{o,L}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi
}\right\vert \leq\left\Vert h_{o,L}-h_{o}\right\Vert _{2}\left\Vert
u_{\Gamma_{n}}^{\ast}\right\Vert _{2}=O(L^{-\rho_{h}}). \label{P-L3-AP-2}%
\end{equation}
By the definition of $\widehat{h}_{n}$,
\begin{equation}
\langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi
}=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] \left(
R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}(S_{n}-H_{n,L}), \label{P-L3-AP-3}%
\end{equation}
where $H_{n,L}=\left[ h_{o,L}(x_{1}),\ldots,h_{o,L}(x_{n})\right] ^{\prime}%
$. By the Cauchy-Schwarz inequality and the H\"{o}lder inequality, we have%
\begin{equation}
\left\Vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)\right] \right\Vert
^{2}\leq\mathbb{E}\left[ (u_{\Gamma_{n}}^{\ast}(x))^{2}\right]
\mathbb{E}\left[ R(x)^{\prime}R(x)\right] \leq CL \label{P-L3-AP-6}%
\end{equation}
where the second inequality is by (\ref{P-L3-AP-1}) and Assumption
\ref{L-A-0}.(iv). Under Assumptions \ref{L-A-0} and \ref{L-A-3}.(i), we can
invoke Lemma 6.2 of Belloni, et al. (2015) to get%
\begin{equation}
\left\Vert Q_{L}-Q_{n,L}\right\Vert =O_{p}(\zeta_{L}(\log L)^{1/2}n^{-1/2}),
\label{P-L3-AP-7}%
\end{equation}
where $Q_{n,L}=n^{-1}R_{n}R_{n}^{\prime}$, which together with Assumption
\ref{L-A-3}.(i) implies that
\begin{equation}
(2C)^{-1}<\omega_{\min}(Q_{n,L})\leq\omega_{\max}(Q_{n,L})<2C\text{ wpa1.}
\label{P-L3-AP-8}%
\end{equation}
By the Cauchy-Schwarz inequality, (\ref{P-L3-AP-6}), (\ref{P-L3-AP-8}) and
Assumption \ref{L-A-0}.(iii)%
\begin{align}
& \left\vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right]
\left( R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}(H_{n}-H_{n,L})\right\vert
^{2}\nonumber\\
& \leq\left\Vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)\right]
\right\Vert ^{2}(H_{n}-H_{n,L})^{\prime}R_{n}^{\prime}\left( R_{n}%
R_{n}^{\prime}\right) ^{-2}R_{n}(H_{n}-H_{n,L})\nonumber\\
& \leq O_{p}(Ln^{-1})(H_{n}-H_{n,L})^{\prime}(H_{n}-H_{n,L})=O_{p}%
(L^{1-2\rho_{h}}), \label{P-L3-AP-9}%
\end{align}
which together with $S_{n}-H_{n,L}=(H_{n}-H_{n,L})+e_{n}$ (where
$e_{n}=\left[ \varepsilon_{1},\ldots,\varepsilon_{n}\right] ^{\prime}$),
(\ref{P-L3-AP-3}) and Assumption \ref{L-A-3}.(v) implies that
\begin{equation}
\langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi
}=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] \left(
R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}e_{n}+o_{p}(n^{-1/2}).
\label{P-L3-AP-10}%
\end{equation}
By Assumptions \ref{L-A-0}.(i)-(ii) and \ref{L-A-0}.(iv), and (\ref{P-L3-AP-8}%
),%
\begin{align}
\mathbb{E}\left[ \left. \left\Vert n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\Vert
^{2}\right\vert \{x_{i}\}_{i=1}^{n}\right] & =\mathbb{E}\left[ \left.
n^{-2}e_{n}^{\prime}R_{n}^{\prime}Q_{L}^{-2}R_{n}e_{n}\right\vert
\{x_{i}\}_{i=1}^{n}\right] \nonumber\\
& \leq n^{-2}\omega_{\min}^{-2}(Q_{L})tr\left( R_{n}^{\prime}\mathbb{E}%
\left[ \left. e_{n}e_{n}^{\prime}\right\vert \{x_{i}\}_{i=1}^{n}\right]
R_{n}\right) \nonumber\\
& \leq Cn^{-2}\omega_{\min}^{-2}(Q_{L})tr\left( R_{n}R_{n}^{\prime}\right)
\nonumber\\
& \leq Cn^{-1}\omega_{\min}^{-2}(Q_{L})tr\left( Q_{n,L}\right)
=O_{p}(Ln^{-1}) \label{P-L3-AP-11}%
\end{align}
which together with the Markov inequality implies that
\begin{equation}
\left\Vert n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\Vert =O_{p}(L^{1/2}n^{-1/2}).
\label{P-L3-AP-12}%
\end{equation}
By the Cauchy-Schwarz inequality,%
\begin{align}
& \left\vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right]
Q_{L,n}^{-1}\frac{R_{n}e_{n}}{n}-\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast
}(x)R(x)^{\prime}\right] n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\vert ^{2}%
\nonumber\\
& =\left\vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right]
Q_{L,n}^{-1}\left( Q_{L,n}-Q_{L}\right) n^{-1}Q_{L}^{-1}R_{n}e_{n}%
\right\vert \nonumber\\
& \leq\left\Vert \mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime
}\right] Q_{L,n}^{-1}\right\Vert \left\Vert Q_{L,n}-Q_{L}\right\Vert
\left\Vert n^{-1}Q_{L}^{-1}R_{n}e_{n}\right\Vert \nonumber\\
& =O_{p}(\zeta_{L}(\log L)^{1/2}Ln^{-1})=o_{p}(n^{-1/2}) \label{P-L3-AP-13}%
\end{align}
where the second equality is by (\ref{P-L3-AP-6}), (\ref{P-L3-AP-7}),
(\ref{P-L3-AP-8}) and (\ref{P-L3-AP-11}), the last equality is by Assumption
\ref{L-A-3}.(iv). Collecting the results in (\ref{P-L3-AP-10}) and
(\ref{P-L3-AP-13}), we get%
\begin{equation}
\langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi
}=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L}%
^{-1}\frac{R_{n}e_{n}}{n}+o_{p}(n^{-1/2}). \label{P-L3-AP-14}%
\end{equation}
By the definition of $u_{\Gamma_{n}}^{\ast}(x)$,
\begin{equation}
\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L}%
^{-1}\frac{R_{n}e_{n}}{n}=\mathbb{E}\left[ \partial g_{o}(\varepsilon
)v_{g_{n}}^{\ast}(\varepsilon)R(x)^{\prime}\right] Q_{L}^{-1}\frac{R_{n}%
e_{n}}{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}}, \label{P-L3-AP-15}%
\end{equation}
and moreover%
\begin{equation}
\Delta_{\varphi}(Z_{1,i},h_{o})[u_{\Gamma_{n}}^{\ast}]=\mathbb{E}\left[
\partial g_{o}(\varepsilon)v_{g_{n}}^{\ast}(\varepsilon)R(x)^{\prime}\right]
Q_{L}^{-1}\frac{R(x_{i})\varepsilon_{i}}{\left\Vert v_{n}^{\ast}\right\Vert
_{sd}}. \label{P-L3-AP-16}%
\end{equation}
Hence we have
\begin{equation}
\mu_{n}\left\{ \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast}]\right\}
=\mathbb{E}\left[ u_{\Gamma_{n}}^{\ast}(x)R(x)^{\prime}\right] Q_{L}%
^{-1}\frac{R_{n}e_{n}}{n} \label{P-L3-AP-17}%
\end{equation}
which together with (\ref{P-L3-AP-2}), (\ref{P-L3-AP-14}) and Assumption
\ref{L-A-3}.(v)\ verifies Assumption 3.3.(i) in HLR.
By definition,
\begin{equation}
\Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast}]+\Delta_{\psi}(Z_{2}%
,g_{o},h_{o})[u_{g_{n}}^{\ast}]=\frac{v_{\Gamma_{n}}^{\ast}(x)\varepsilon
+v_{g_{n}}^{\ast}(\varepsilon)u}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}.
\label{P-L3-AP-18}%
\end{equation}
By the Cauchy-Schwarz inequality, Assumptions \ref{L-A-0}.(iv)-(v),
\ref{L-A-1}.(ii) and (\ref{P-LAP-1}),%
\begin{align}
\frac{\sup_{x\in\mathcal{X}}\left\vert v_{\Gamma_{n}}^{\ast}(x)\right\vert
^{2}}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}} & =\frac{\zeta_{L}^{2}%
}{\omega_{\min}^{2}(Q_{L})}\frac{\left\Vert \mathbb{E}\left[ \partial
g_{o}(\varepsilon)v_{g_{n}}^{\ast}(\varepsilon)R(x)\right] \right\Vert ^{2}%
}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}}\nonumber\\
& \leq\frac{C\zeta_{L}^{2}}{\omega_{\min}^{2}(Q_{L})}\frac{\mathbb{E}\left[
(\partial g_{o}(\varepsilon)v_{g_{n}}^{\ast}(\varepsilon))^{2}\right]
\mathbb{E}\left[ R(x)^{\prime}R(x)\right] }{\left\Vert v_{g_{n}}^{\ast
}\right\Vert _{2}^{2}}\nonumber\\
& \leq\frac{C\zeta_{L}^{2}\sup_{\varepsilon\in\mathcal{E}}(\partial
g_{o}(\varepsilon))^{2}}{\omega_{\min}^{2}(Q_{L})}\frac{\mathbb{E}\left[
(v_{g_{n}}^{\ast}(\varepsilon))^{2}\right] \mathbb{E}\left[ R(x)^{\prime
}R(x)\right] }{\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{2}}=O(L\zeta
_{L}^{2}). \label{P-L3-AP-20}%
\end{align}
By Assumptions \ref{L-A-0}.(ii), \ref{L-A-1}.(i), \ref{L-A-3}.(i) and
\ref{L-A-3}.(iv), and the results in (\ref{P-L2-AP-2}), (\ref{P-L2-AP-5B}),
(\ref{P-L3-AP-1}) and (\ref{P-L3-AP-20}),%
\begin{align}
\frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast}(x)\varepsilon
+v_{g_{n}}^{\ast}(\varepsilon)u\right\vert ^{4}\right] }{n\left\Vert
v_{n}^{\ast}\right\Vert _{sd}^{4}} & \leq8\frac{\mathbb{E}\left[ \left\vert
v_{\Gamma_{n}}^{\ast}(x)\varepsilon\right\vert ^{4}\right] +\mathbb{E}\left[
\left\vert v_{g_{n}}^{\ast}(\varepsilon)u\right\vert ^{4}\right]
}{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}}\nonumber\\
& \leq C\frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast
}(x)\right\vert ^{4}\right] +\mathbb{E}\left[ \left\vert v_{g_{n}}^{\ast
}(\varepsilon)\right\vert ^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert
_{sd}^{4}}\nonumber\\
& \leq Cn^{-1}(\xi_{0,K}^{2}+L\zeta_{L}^{2})\left( \mathbb{E}\left[
\left\vert u_{\Gamma_{n}}^{\ast}(x)\right\vert ^{2}\right] +\mathbb{E}\left[
\left\vert u_{g_{n}}^{\ast}(\varepsilon)\right\vert ^{2}\right] \right)
\nonumber\\
& =O(\xi_{0,K}^{2}n^{-1}+L\zeta_{L}^{2}n^{-1})=o(1), \label{P-L3-AP-22}%
\end{align}
which together with Assumption \ref{L-A-0}.(i) and the Linderberge CLT
verifies Assumption 3.3.(ii) in HLR. The condition $\varepsilon_{2,n}%
=O(\kappa_{n})$ and $\kappa_{n}\delta_{2,n}^{\ast-1}=o(1)$ in Assumption
3.3.(iii) of HLR hold by $\varepsilon_{2,n}=0$ and by $n^{-1/2}\delta
_{2,n}^{\ast-1}=O(1)$ respectively. Moreover $||u_{g_{n}}^{\ast}||_{\psi}%
^{2}\leq C$ by the definition of $\left\Vert \cdot\right\Vert _{\psi}$ and
(\ref{P-L2-AP-2}). This verifies Assumption 3.3.(iii) in HLR.
\end{proof}
\bigskip
Recall that $\mathcal{N}_{h,n}=\{h\in\mathcal{H}_{n}:\left\Vert h-h_{o}%
\right\Vert _{2}\leq\delta_{h,n}^{\ast}\log(\log(n))\}$ and $\mathcal{N}%
_{n}=\mathcal{N}_{h,n}\times\mathcal{N}_{g,n}$. In Section 4 of HLR, we define
$\mathcal{W}_{1,n}=\{h\in\mathcal{V}_{1,n}:\left\Vert h\right\Vert _{2}%
\leq1\}$ and $\mathcal{W}_{2,n}=\{g\in\mathcal{V}_{2,n}:\left\Vert
g\right\Vert _{2}\leq1\}$.
\begin{lemma}
\label{L-AP-4} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2},
\ref{L-A-3} and \ref{L-A-4} hold. Then Assumptions 4.1 and 4.2 in HLR hold.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-AP-4}]Assumptions 4.1.(i) and 4.1.(ii) in HLR hold by
the definition of $\left\langle \cdot,\cdot\right\rangle _{\psi}$. By the
Cauchy-Schwarz inequality,
\begin{align}
\sup_{\alpha\in\mathcal{N}_{n}} & \sup_{v_{g_{1}},v_{g_{2}}\in
\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}r_{\psi}(Z_{2,i}%
,\alpha)[v_{g_{1}},v_{g_{2}}]-\mathbb{E}\left[ r_{\psi}(Z_{2},\alpha
_{o})[v_{g_{1}},v_{g_{2}}]\right] \right\vert \nonumber\\
& =\sup_{v_{g_{1}},v_{g_{2}}\in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum
_{i=1}^{n}v_{g_{1}}(\varepsilon_{i})v_{g_{2}}(\varepsilon_{i})-\mathbb{E}%
\left[ v_{g_{1}}(\varepsilon)v_{g_{2}}(\varepsilon)\right] \right\vert
\nonumber\\
& \leq\left\Vert Q_{n,K}-Q_{K}\right\Vert =O_{p}(\xi_{0,K}(\log
K)^{1/2}n^{-1/2})=o_{p}(1) \label{P-L4-AP-1}%
\end{align}
where the second equality is by (\ref{P-AP-AL1-0}), the third equality is by
Assumption \ref{L-A-3}.(i). This means that Assumption 4.1.(iii) in HLR holds.
Assumption 4.1.(iv) in HLR is assumed in Assumption \ref{L-A-2}.(iv).
This\ verifies Assumption 4.1 in HLR.
Assumptions 4.2.(i) and 4.2.(ii) in HLR hold by the definition of
$\left\langle \cdot,\cdot\right\rangle _{\varphi}$. By the Cauchy-Schwarz
inequality,%
\begin{align}
\sup_{h\in\mathcal{N}_{h,n}} & \sup_{v_{h_{1}},v_{h_{2}}\in\mathcal{W}%
_{1,n}}\left\vert n^{-1}\sum_{i=1}^{n}r_{\varphi}(Z_{1,i},h)[v_{h_{1}%
},v_{h_{2}}]-\mathbb{E}\left[ r_{\varphi}(Z_{1},h_{o})[v_{h_{1}},v_{h_{2}%
}]\right] \right\vert \nonumber\\
& =\sup_{v_{h_{1}},v_{h_{2}}\in\mathcal{W}_{1,n}}\left\vert n^{-1}\sum
_{i=1}^{n}v_{h_{1}}(x_{i})v_{h_{2}}(x_{i})-\mathbb{E}\left[ v_{h_{1}%
}(x)v_{h_{2}}(x)\right] \right\vert \nonumber\\
& \leq\left\Vert Q_{n,L}-Q_{L}\right\Vert =O_{p}(\zeta_{L}(\log
L)^{1/2}n^{-1/2})=o_{p}(1) \label{P-L4-AP-2}%
\end{align}
where the second equality is by (\ref{P-L3-AP-7}), and the third equality is
by Assumption \ref{L-A-3}.(i). This means that Assumption 4.2.(iii) in HLR
holds. As $\partial\rho(\alpha)[v_{h}]=0$ for any $\alpha$ in this example,
Assumption 4.2.(iv) in HLR holds.
Under Assumptions \ref{L-A-3}.(v) and \ref{L-A-4},%
\begin{equation}
\xi_{1,K}\delta_{2,n}\leq\xi_{1,K}(K^{1/2}n^{-1/2}+K^{-\rho_{g}}%
+\upsilon_{1,K}L^{1/2}n^{-1/2})\log(\log(n))=o(1). \label{P-L4-AP-2A}%
\end{equation}
By definition, for any $\alpha\in\mathcal{N}_{n}$, we have
\begin{align}
& \Gamma_{n}(\alpha)\left[ v_{h},v_{g}\right] -\Gamma(\alpha_{o})\left[
v_{h},v_{g}\right] \nonumber\\
& =n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon_{h,i})-\partial
g(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon_{i})\nonumber\\
& +n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon_{i})-\partial
g_{o}(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon_{i})\nonumber\\
& +n^{-1}\sum_{i=1}^{n}\partial g_{o}(\varepsilon_{i})v_{h}(x_{i}%
)v_{g}(\varepsilon_{i})-\mathbb{E}\left[ \partial g_{o}(\varepsilon
)v_{h}(x)v_{g}(\varepsilon)\right] . \label{P-L4-AP-3}%
\end{align}
By the Cauchy-Schwarz inequality,
\begin{align}
& \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }%
}\left\vert n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})v_{g}(\varepsilon
_{i})\right\vert \right\vert ^{2}\nonumber\\
& \leq\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{
}}\left[ n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})\right\vert ^{2}\times
n^{-1}\sum_{i=1}^{n}\left\vert v_{g}(\varepsilon_{i})\right\vert ^{2}\right]
\nonumber\\
& \leq\left\Vert Q_{L,n}\right\Vert \left\Vert Q_{K,n}\right\Vert =O_{p}(1)
\label{P-L4-AP-4}%
\end{align}
where the equality is by Assumptions \ref{L-A-0}.(iv), \ref{L-A-1}.(iv), and
results in (\ref{P-AP-AL1-0}) and (\ref{P-L3-AP-7}). Recall that
$\mathcal{B}_{2,n}^{\ast}\equiv\{v\in\mathcal{V}_{2,n}:\left\Vert v-v_{g_{n}%
}^{\ast}\right\Vert _{\psi}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{\psi}%
^{-1}\leq\delta_{v_{g},n}\}$, where $\delta_{v_{g},n}=o(1)$ is some positive
sequence such that $\widehat{v}_{g_{n}}^{\ast}\in\mathcal{B}_{2,n}^{\ast}$
wpa1. For any $v_{g}\in\mathcal{B}_{2,n}^{\ast}$, we have
\begin{equation}
\left\vert \left\Vert v_{g}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast
}\right\Vert _{2}^{-1}-1\right\vert \leq\left\Vert v_{g}-v_{g_{n}}^{\ast
}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}=o(1)
\label{P-L4-AP-6}%
\end{equation}
which implies that
\begin{equation}
\sup_{v_{g}\in\mathcal{B}_{2,n}^{\ast}}\left\Vert v_{g}\right\Vert
_{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}\leq2 \label{P-L4-AP-7}%
\end{equation}
for all large $n$. By (\ref{P-L4-AP-7}),\ the mean value expansion, the
triangle inequality and the Cauchy-Schwarz inequality,%
\begin{align}
& \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast
}\text{ }}\left\vert n^{-1}\sum_{i=1}^{n}\left[ \partial g(\varepsilon
_{h,i})-\partial g(\varepsilon_{i})\right] v_{h}(x_{i})v_{g}(\varepsilon
_{i})\right\vert \nonumber\\
& \leq2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in
\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left\vert
n^{-1}\sum_{i=1}^{n}\partial P(\widetilde{\varepsilon}_{h,i})^{\prime}%
(\beta-\beta_{o,K})(\varepsilon_{h,i}-\varepsilon_{i})v_{h}(x_{i}%
)v_{g}(\varepsilon_{i})\right\vert \nonumber\\
& +2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in\mathcal{W}%
_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left\vert n^{-1}\sum
_{i=1}^{n}\partial P(\widetilde{\varepsilon}_{h,i})^{\prime}\beta
_{o,K}(\varepsilon_{h,i}-\varepsilon_{i})v_{h}(x_{i})v_{g}(\varepsilon
_{i})\right\vert \nonumber\\
& \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left[ \xi_{1,K}%
\left\Vert \beta-\beta_{o,K}\right\Vert +\upsilon_{1,K}\right] \zeta
_{L}\delta_{1,n}\left( \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}%
\in\mathcal{W}_{2,n}\text{ }}n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i}%
)v_{g}(\varepsilon_{i})\right\vert \right) \nonumber\\
& \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left[ \xi_{1,K}%
\delta_{2,n}+\upsilon_{1,K}\right] \zeta_{L}\delta_{1,n}\left( \sup
_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}%
n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert
\right) , \label{P-L4-AP-8}%
\end{align}
for any $h\in N_{h,n}$ and any $g\in N_{g,n}$, where the second inequality is
by (\ref{P-L2-AP-3A}), the third inequality is by (\ref{P-L2-AP-12D}%
).\textbf{\ }Equation (\ref{P-L4-AP-8}) together with Assumptions
\ref{L-A-4}.(iii)-(v), (\ref{P-L4-AP-2A}), and (\ref{P-L4-AP-4}) implies that%
\begin{align}
& \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}%
v_{g}\in\mathcal{B}_{2,n}^{\ast}\text{ }}\left\vert n^{-1}\sum_{i=1}%
^{n}\left[ \partial g(\varepsilon_{h,i})-\partial g(\varepsilon_{i})\right]
v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert \nonumber\\
& =O_{p}((\xi_{1,K}\delta_{2,n}+\upsilon_{1,K})\zeta_{L}\delta_{1,n}%
)\nonumber\\
& =O_{p}(n^{-1/2}L^{1/2}\zeta_{L}\xi_{1,K}(n^{-1/2}K^{1/2}+K^{-\rho_{g}%
}+\upsilon_{1,K}n^{-1/2}L^{1/2})+n^{-1/2}L^{1/2}\zeta_{L}\upsilon_{1,K}%
)=o_{p}(1). \label{P-L4-AP-9}%
\end{align}
By the triangle inequality, the Cauchy-Schwarz inequality, Assumption
\ref{L-A-1}.(iii) and \ref{L-A-1}.(v)
\begin{align}
\sup_{\varepsilon\in\mathcal{E}}\left\vert \partial g(\varepsilon)-\partial
g_{o}(\varepsilon)\right\vert & \leq\sup_{\varepsilon\in\mathcal{E}%
}\left\vert \partial g(\varepsilon)-\partial g_{o,K}(\varepsilon)\right\vert
+\sup_{\varepsilon\in\mathcal{E}}\left\vert \partial g_{o,K}(\varepsilon
)-\partial g_{o}(\varepsilon)\right\vert \nonumber\\
& \leq\xi_{1,K}\left\Vert \beta-\beta_{o,K}\right\Vert +K^{-\rho_{g}},
\label{P-L4-AP-10}%
\end{align}
which together with the definition of $\mathcal{N}_{n}$ and (\ref{P-L2-AP-4A})
implies that
\begin{equation}
\sup_{g\in\mathcal{N}_{g,n}}\sup_{\varepsilon\in\mathcal{E}}\left\vert
\partial g(\varepsilon)-\partial g_{o}(\varepsilon)\right\vert \leq C\xi
_{1,K}\delta_{2,n}+K^{-\rho_{g}}=o(1) \label{P-L4-AP-11}%
\end{equation}
where the equality is by Assumption \ref{L-A-3}.(v) and (\ref{P-L4-AP-2A}).
Using (\ref{P-L4-AP-11}) and the triangle inequality%
\begin{align}
& \sup_{g\in\mathcal{N}_{g,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}%
\in\mathcal{B}_{2,n}^{\ast}\text{ }}\left\vert n^{-1}\sum_{i=1}^{n}\left[
\partial g(\varepsilon_{i})-\partial g_{o}(\varepsilon_{i})\right]
v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert \nonumber\\
& \leq\sup_{g\in\mathcal{N}_{g,n}}\sup_{\varepsilon\in\mathcal{E}}\left\vert
\partial g(\varepsilon)-\partial g_{o}(\varepsilon)\right\vert \times
\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}\text{
}}n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert
\nonumber\\
& \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}(\xi_{1,K}\delta
_{2,n}+K^{-\rho_{g}})\left( \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}%
\in\mathcal{W}_{2,n}\text{ }}n^{-1}\sum_{i=1}^{n}\left\vert v_{h}(x_{i}%
)v_{g}(\varepsilon_{i})\right\vert \right) =o_{p}(1) \label{P-L4-AP-12}%
\end{align}
where the equality is by Assumption \ref{L-A-4}.(i) and (\ref{P-L4-AP-4}). By
Assumptions \ref{L-A-0}.(i), \ref{L-A-0}.(v) \ref{L-A-1}.(ii), \ref{L-A-1}%
.(iv) and the Cauchy-Schwarz inequality,
\begin{equation}
\mathbb{E}\left[ \left\Vert \mu_{n}\left\{ \partial g_{o}(\varepsilon
)R(x)P(\varepsilon)^{\prime}\right\} \right\Vert ^{2}\right] \leq
n^{-1}\mathbb{E}\left[ \left\vert \partial g_{o}(\varepsilon)\right\vert
^{2}\left\vert P(\varepsilon)^{\prime}R(x)\right\vert ^{2}\right] \leq
CK\zeta_{L}n^{-1}=o(1) \label{P-L4-AP-13}%
\end{equation}
where the equality is by Assumption \ref{L-A-3}.(i). By the Cauchy-Schwarz
inequality,
\begin{align}
& \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}%
}\left\vert \mu_{n}\left\{ \partial g_{o}(\varepsilon)v_{h}(x)v_{g}%
(\varepsilon)\right\} \right\vert \nonumber\\
& \leq2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in
\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{W}_{2,n}\text{ }}\left\vert \mu
_{n}\left\{ \partial g_{o}(\varepsilon)v_{h}(x)v_{g}(\varepsilon)\right\}
\right\vert \nonumber\\
& \leq2\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left\Vert \mu_{n}\left\{
\partial g_{o}(\varepsilon)R(x)P(\varepsilon)^{\prime}\right\} \right\Vert
=o_{p}(1) \label{P-L4-AP-14}%
\end{align}
where the equality is by Assumption \ref{L-A-4}.(i), (\ref{P-L4-AP-13}) and
the Markov inequality. Collecting the results in (\ref{P-L4-AP-3}),
(\ref{P-L4-AP-9}), (\ref{P-L4-AP-12}) and (\ref{P-L4-AP-14}), we get%
\begin{equation}
\sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}%
\in\mathcal{B}_{2,n}^{\ast}}\left\vert \Gamma_{n}(\alpha)\left[ v_{h}%
,v_{g}\right] -\Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] \right\vert
=o_{p}(1). \label{P-L4-AP-15}%
\end{equation}
By the H\"{o}lder inequality and Assumption \ref{L-A-1}.(ii)%
\begin{align}
& \sup_{v_{h}\in\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}%
}\left\vert \Gamma(\alpha_{o})\left[ v_{h},v_{g}-v_{g_{n}}^{\ast}\right]
\right\vert \nonumber\\
& \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\sup_{v_{h}\in
\mathcal{W}_{1,n}\text{,}v_{g}\in\mathcal{B}_{2,n}^{\ast}}\left[ \left\Vert
v_{h}\right\Vert _{2}\left\Vert v_{g}-v_{g_{n}}^{\ast}\right\Vert
_{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}\right] \nonumber\\
& \leq C\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}\left\Vert Q_{L}%
\right\Vert \sup_{v_{g}\in\mathcal{B}_{2,n}^{\ast}}\left\Vert v_{g}-v_{g_{n}%
}^{\ast}\right\Vert _{2}\left\Vert v_{g_{n}}^{\ast}\right\Vert _{2}^{-1}=o(1)
\label{P-L4-AP-16}%
\end{align}
where the equality is by Assumption \ref{L-A-4}.(i), Assumption \ref{L-A-0}%
.(iv) and the definition of $\mathcal{B}_{2,n}^{\ast}$. Combining the results
in (\ref{P-L4-AP-15}) and (\ref{P-L4-AP-16}), we verify Assumption 4.2.(v) in HLR.
\end{proof}
\bigskip
\begin{lemma}
\label{L-AP-5} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2},
\ref{L-A-3} and \ref{L-A-4} hold. Then Lemma C.3 in HLR holds..
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-AP-5}]By definition for any $h\in\mathcal{N}_{h,n}$,%
\begin{equation}
\Delta_{\varphi}^{2}(Z_{1},h)[v_{h}]=\varepsilon^{2}v_{h}^{2}(x)+(h(x)-h_{o}%
(x))^{2}v_{h}^{2}(x)-2\varepsilon v_{h}^{2}(x)(h(x)-h_{o}(x)).
\label{P-L5-AP-1}%
\end{equation}
By the definitions of $\mathcal{W}_{1,n}$ and the\ operator norm,%
\begin{equation}
\sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert \mu_{n}\left\{ \varepsilon
^{2}v_{h}^{2}(x)\right\} \right\vert \leq\left\Vert \mu_{n}\left\{
\varepsilon^{2}R(x)R(x)^{\prime}\right\} \right\Vert . \label{P-L5-AP-2}%
\end{equation}
By Assumptions \ref{L-A-0} and the Cauchy-Schwarz inequality,%
\begin{equation}
\mathbb{E}\left[ \left\Vert \mu_{n}\left\{ \varepsilon^{2}R(x)R(x)^{\prime
}\right\} \right\Vert ^{2}\right] \leq n^{-1}\mathbb{E}\left[
\varepsilon^{4}\left\vert R(x)^{\prime}R(x)\right\vert ^{2}\right] \leq
L\zeta_{L}^{2}n^{-1} \label{P-L5-AP-3}%
\end{equation}
which together with (\ref{P-L5-AP-2}), the Markov inequality and Assumption
\ref{L-A-3}.(i) implies that
\begin{equation}
\sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert \mu_{n}\left\{ \varepsilon
^{2}v_{h}^{2}(x)\right\} \right\vert =o_{p}(1). \label{P-L5-AP-4}%
\end{equation}
By the definition of $\mathcal{N}_{h,n}$,
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}%
\sum_{i=1}^{n}(h(x_{i})-h_{o}(x_{i}))^{2}v_{h}^{2}(x_{i})\nonumber\\
& \leq\left( \sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert
_{\infty}^{2}\right) \left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum
_{i=1}^{n}v_{h}^{2}(x_{i})\right) =O_{p}(\zeta_{L}^{2}\delta_{h,n}^{2}%
)=o_{p}(1) \label{P-L5-AP-5}%
\end{align}
where $\delta_{h,n}^{\ast2}=Ln^{-1}+L^{-2\rho_{h}}$, the first equality is by
(\ref{P-L2-AP-3A}) and%
\begin{equation}
\sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}^{2}(x_{i})=O_{p}(1),
\label{P-L5-AP-6}%
\end{equation}
which follows by arguments in showing (\ref{P-L4-AP-4}), the second equality
is by Assumptions \ref{L-A-3}.(i) and \ref{L-A-3}.(v). By the Cauchy-Schwarz
inequality,%
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert
n^{-1}\sum_{i=1}^{n}\varepsilon_{i}v_{h}^{2}(x_{i})(h(x_{i})-h_{o}%
(x_{i}))\right\vert ^{2}\nonumber\\
& \leq\left( \sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert
_{\infty}^{2}\right) \left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum
_{i=1}^{n}\varepsilon_{i}^{2}v_{h}^{2}(x_{i})\right) \left( \sup_{v_{h}%
\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}^{2}(x_{i})\right) .
\label{P-L5-AP-7}%
\end{align}
By Assumptions \ref{L-A-0}.(ii) and \ref{L-A-0}.(iv),
\begin{equation}
\left\vert \sup_{v_{h}\in\mathcal{W}_{1,n}}\mathbb{E}\left[ \varepsilon
_{i}^{2}v_{h}^{2}(x_{i})\right] \right\vert \leq\left\Vert \mathbb{E}\left[
\varepsilon^{2}R(x)R(x)^{\prime}\right] \right\Vert \leq C \label{P-L5-AP-8}%
\end{equation}
which together with (\ref{P-L5-AP-4}) implies that
\begin{equation}
\sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}\varepsilon_{i}^{2}%
v_{h}^{2}(x_{i})=O_{p}(1). \label{P-L5-AP-9}%
\end{equation}
By (\ref{P-L5-AP-6}), (\ref{P-L5-AP-7}), (\ref{P-L5-AP-9}) and the definition
of $\mathcal{N}_{h,n}$,
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n}}\left\vert
n^{-1}\sum_{i=1}^{n}\varepsilon_{i}v_{h}^{2}(x_{i})(h(x_{i})-h_{o}%
(x_{i}))\right\vert ^{2}=O_{p}(\zeta_{L}^{2}\delta_{h,n}^{2})=o_{p}(1)
\label{P-L5-AP-10}%
\end{equation}
where $\delta_{h,n}^{\ast2}=Ln^{-1}+L^{-2\rho_{h}}$, the second equality is by
Assumptions \ref{L-A-3}.(i) and \ref{L-A-3}.(v). Collecting the results in
(\ref{P-L5-AP-1}), (\ref{P-L5-AP-4}), (\ref{P-L5-AP-5}) and (\ref{P-L5-AP-10}%
), we show that Lemma C.3.(i) in HLR holds.
By definition%
\begin{align}
& n^{-1}\sum_{i=1}^{n}\Delta_{\psi}^{2}(Z_{2,i},\alpha)[v_{g}]-\mathbb{E}%
\left[ \Delta_{\psi}^{2}(Z_{2},\alpha_{o})[v_{g}]\right] \nonumber\\
& =\mu_{n}\left\{ u^{2}v_{g}^{2}(\varepsilon)\right\} +n^{-1}\sum_{i=1}%
^{n}(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}))^{2}v_{g}^{2}(\varepsilon
_{i})\nonumber\\
& -2n^{-1}\sum_{i=1}^{n}u_{i}(g(\varepsilon_{h,i})-g_{o}(\varepsilon
_{i}))v_{g}^{2}(\varepsilon_{i}). \label{P-L5-AP-11}%
\end{align}
Using similar arguments in showing (\ref{P-L5-AP-4}), we can show that%
\begin{equation}
\sup_{v_{g}\in\mathcal{W}_{2,n}}\mu_{n}\left\{ u^{2}v_{g}^{2}(\varepsilon
)\right\} =O_{p}(K\xi_{0,K}n^{-1})=o_{p}(1), \label{P-L5-AP-12}%
\end{equation}
where the equality is by Assumption \ref{L-A-3}.(i). By (\ref{P-L2-AP-4}) and
(\ref{P-L2-AP-5}),%
\begin{align}
& \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}%
\sum_{i=1}^{n}(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}))^{2}v_{g}%
^{2}(\varepsilon_{i})\nonumber\\
& \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\sup_{v_{g}%
\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}^{2}(\varepsilon_{i})\nonumber\\
& =o(1)\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}%
^{2}(\varepsilon_{i})=o_{p}(1) \label{P-L5-AP-13}%
\end{align}
where the first equality is by%
\begin{equation}
(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}=o(1), \label{P-L5-AP-13A}%
\end{equation}
which is implied by Assumption \ref{L-A-3}.(i), \ref{L-A-3}.(v) and
$(\zeta_{L}^{2}+\xi_{0,K}^{2})\upsilon_{1,K}^{2}Ln^{-1}=o(1)$ (which is
implied by Assumption \ref{L-A-4}), the second equality in (\ref{P-L5-AP-13})
is by%
\begin{equation}
\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}^{2}(\varepsilon
_{i})=O_{p}(1) \label{P-L5-AP-14}%
\end{equation}
which follows by arguments in showing (\ref{P-L4-AP-4}). Similarly by
(\ref{P-L2-AP-4}) and (\ref{P-L2-AP-5}),%
\begin{align}
& \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{g}\in\mathcal{W}_{2,n}}\left\vert
n^{-1}\sum_{i=1}^{n}u_{i}(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}%
))v_{g}^{2}(\varepsilon_{i})\right\vert ^{2}\nonumber\\
& \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\sup_{v_{g}%
\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}u_{i}^{2}v_{g}^{2}(\varepsilon
_{i})\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}v_{g}^{2}%
(\varepsilon_{i})\nonumber\\
& =o_{p}(1)\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}u_{i}^{2}%
v_{g}^{2}(\varepsilon_{i})=o_{p}(1) \label{P-L5-AP-15}%
\end{align}
where the first equality is by (\ref{P-L5-AP-13A}) and (\ref{P-L5-AP-14}), the
second equality is by
\begin{equation}
\sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}^{n}u_{i}^{2}v_{g}%
^{2}(\varepsilon_{i})=O_{p}(1) \label{P-L5-AP-16}%
\end{equation}
which follows by similar arguments in showing (\ref{P-L5-AP-9}). Collecting
the results in (\ref{P-L5-AP-11}), (\ref{P-L5-AP-12}), (\ref{P-L5-AP-13}) and
(\ref{P-L5-AP-15}), we show that Lemma C.3.(ii) in HLR holds.
By definition%
\begin{align}
& \Delta_{\varphi}(Z_{1},h)[v_{h}]\Delta_{\psi}(Z_{2},\alpha)[v_{g}%
]-\mathbb{E}_{Z}\left[ \Delta_{\varphi}(Z_{1},h_{o})[v_{h}]\Delta_{\psi
}(Z_{2},\alpha_{o})[v_{g}]\right] \nonumber\\
& =u\varepsilon v_{g}(\varepsilon)v_{h}(x)-\mathbb{E}\left[ u\varepsilon
v_{g}(\varepsilon)v_{h}(x)\right] \nonumber\\
& +(h(x)-h_{o}(x))uv_{h}(x)v_{g}(\varepsilon)+(g(\varepsilon_{h}%
)-g_{o}(\varepsilon))\varepsilon v_{h}(x)v_{g}(\varepsilon)\nonumber\\
& +(g(\varepsilon_{h})-g_{o}(\varepsilon))(h(x)-h_{o}(x))v_{h}(x)v_{g}%
(\varepsilon), \label{P-L5-AP-17}%
\end{align}
for any $\alpha\in\mathcal{N}_{n}$. By the Cauchy-Schwarz inequality and
Assumptions \ref{L-A-0}.(i)-(ii), \ref{L-A-0}.(v), \ref{L-A-1}.(i) and
\ref{L-A-1}.(v),%
\begin{align}
\mathbb{E}\left[ \left\Vert \mu_{n}\left\{ u\varepsilon R(x)P(\varepsilon
)^{\prime}\right\} \right\Vert ^{2}\right] & =n^{-1}\mathbb{E}\left[
u^{2}\varepsilon^{2}P(\varepsilon)^{\prime}P(\varepsilon)R(x)^{\prime
}R(x)\right] \nonumber\\
& \leq n^{-1}\sqrt{\mathbb{E}\left[ \left( u^{2}P(\varepsilon)^{\prime
}P(\varepsilon)\right) ^{2}\right] }\sqrt{\mathbb{E}\left[ \left(
\varepsilon^{2}R(x)^{\prime}R(x)\right) ^{2}\right] }\nonumber\\
& \leq n^{-1}\sqrt{\xi_{0,K}^{2}\mathbb{E}\left[ P(\varepsilon)^{\prime
}P(\varepsilon)\right] }\sqrt{\zeta_{L}^{2}\mathbb{E}\left[ R(x)^{\prime
}R(x)\right] }\nonumber\\
& \leq Cn^{-1}\zeta_{L}\xi_{0,K}L^{1/2}K^{1/2}\leq Cn^{-1}(L+K)(\zeta_{L}%
^{2}+K^{2})=o(1), \label{P-L5-AP-18}%
\end{align}
where the third inequality is by $\mathbb{E}\left[ P(\varepsilon)^{\prime
}P(\varepsilon)\right] \leq tr(Q_{K})=O(K)$ and $\mathbb{E}\left[
R(x)^{\prime}R(x)\right] \leq tr(Q_{L})=O(L)$, and the last equality is by
Assumption \ref{L-A-3}.(i). By the Cauchy-Schwarz inequality, the Markov
inequality and (\ref{P-L5-AP-18}), we have%
\begin{equation}
\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}\in\mathcal{W}_{2,n}}\mu_{n}\left\{
u\varepsilon v_{g}(\varepsilon)v_{h}(x)\right\} =o_{p}(1). \label{P-L5-AP-19}%
\end{equation}
By (\ref{P-L2-AP-4}), (\ref{P-L2-AP-5}) and the Cauchy-Schwarz inequality,%
\begin{align}
& \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}%
\in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}(g(\varepsilon
_{h,i})-g_{o}(\varepsilon_{i}))\varepsilon_{i}v_{h}(x_{i})v_{g}(\varepsilon
_{i})\right\vert ^{2}\nonumber\\
& \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\left( \sup_{v_{h}%
\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}\varepsilon_{i}^{2}v_{h}^{2}%
(x_{i})\right) \left( \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum_{i=1}%
^{n}v_{g}^{2}(\varepsilon_{i})\right) =o_{p}(1) \label{P-L5-AP-20}%
\end{align}
where the equality is by (\ref{P-L5-AP-13A}), (\ref{P-L5-AP-9})\ and
(\ref{P-L5-AP-14}). By the Cauchy-Schwarz inequality,%
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}%
\in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}(h(x_{i})-h_{o}%
(x_{i}))u_{i}v_{h}(x_{i})v_{g}(\varepsilon_{i})\right\vert ^{2}\nonumber\\
& \leq\sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}%
^{2}\left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}%
^{2}(x_{i})\right) \left( \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum
_{i=1}^{n}u_{i}v_{g}^{2}(\varepsilon_{i})\right) =o_{p}(1) \label{P-L5-AP-21}%
\end{align}
where the equality is by (\ref{P-L5-AP-6}), (\ref{P-L5-AP-16}) and $\sup
_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}=\zeta
_{L}^{2}\delta_{1,n}^{2}=o(1)$ which is implied by Assumption \ref{L-A-3}.(i).
Similarly,
\begin{align}
& \sup_{\alpha\in\mathcal{N}_{n}}\sup_{v_{h}\in\mathcal{W}_{1,n},v_{g}%
\in\mathcal{W}_{2,n}}\left\vert n^{-1}\sum_{i=1}^{n}(h(x_{i})-h_{o}%
(x_{i}))(g(\varepsilon_{h,i})-g_{o}(\varepsilon_{i}))v_{h}(x_{i}%
)v_{g}(\varepsilon_{i})\right\vert ^{2}\nonumber\\
& \leq C(\zeta_{L}^{2}+\xi_{0,K}^{2})\delta_{2,n}^{2}\sup_{h\in
\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}\nonumber\\
& \times\left( \sup_{v_{h}\in\mathcal{W}_{1,n}}n^{-1}\sum_{i=1}^{n}v_{h}%
^{2}(x_{i})\right) \left( \sup_{v_{g}\in\mathcal{W}_{2,n}}n^{-1}\sum
_{i=1}^{n}v_{g}^{2}(\varepsilon_{i})\right) \nonumber\\
& =O_{p}((\zeta_{L}^{2}+\xi_{0,K}^{2})\zeta_{L}^{2}\delta_{1,n}^{2}%
\delta_{2,n}^{2})=o_{p}(1) \label{P-L5-AP-22}%
\end{align}
where the first equality is by (\ref{P-L5-AP-6}), (\ref{P-L5-AP-14}) and
$\sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{\infty}^{2}%
=\zeta_{L}^{2}\delta_{1,n}^{2}$, the second equality is by (\ref{P-L5-AP-13A}%
), and $\zeta_{L}^{2}\delta_{1,n}^{2}=o(1)$ which is implied by Assumption
\ref{L-A-3}.(i). Collecting the results in (\ref{P-L5-AP-17}),
(\ref{P-L5-AP-19}), (\ref{P-L5-AP-20}), (\ref{P-L5-AP-21}) and
(\ref{P-L5-AP-22}), we show that Lemma C.3.(iii) in HLR holds.
\end{proof}
\bigskip
\begin{lemma}
\label{L-AP-6} Suppose that Assumptions \ref{L-A-0}, \ref{L-A-1}, \ref{L-A-2}
and \ref{L-A-3} hold. Then Assumption 4.3.(iv) in HLR holds.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{L-AP-6}]By the definition of $\Delta_{\varphi}%
(Z_{1},h_{o})[v_{h}]$, we have%
\begin{equation}
\sup_{v_{h}\in\mathcal{W}_{1,n}}\mathbb{E}\left[ \left\vert \Delta_{\varphi
}(Z_{1},h_{o})[v_{h}]\right\vert ^{2}\right] \leq\left\Vert \mathbb{E}\left[
\varepsilon^{2}R(x)R(x)^{\prime}\right] \right\Vert \leq C\left\Vert
Q_{L}\right\Vert \leq C \label{P-L6-AP-1}%
\end{equation}
where the second inequality is by Assumption \ref{L-A-0}.(ii), the third
inequality is by Assumption \ref{L-A-0}.(iv). Similarly,
\begin{equation}
\sup_{v_{g}\in\mathcal{W}_{2,n}}\mathbb{E}\left[ \left\vert \Delta_{\psi
}(Z_{2},\alpha_{o})[v_{g}]\right\vert ^{2}\right] \leq\left\Vert
\mathbb{E}\left[ u^{2}P(\varepsilon)P(\varepsilon)^{\prime}\right]
\right\Vert \leq C\left\Vert Q_{K}\right\Vert \leq C \label{P-L6-AP-2}%
\end{equation}
where the second inequality is by Assumption \ref{L-A-1}.(i), the third
inequality is by Assumption \ref{L-A-1}.(iv). By $v_{h_{n}}^{\ast}=0$,
(\ref{P-LAP-1}), (\ref{P-L2-AP-2}) and (\ref{P-L3-AP-1}),
\begin{equation}
(\left\Vert v_{h_{n}}^{\ast}\right\Vert _{\varphi}+\left\Vert v_{\Gamma_{n}%
}^{\ast}\right\Vert _{\varphi}+\left\Vert v_{g_{n}}^{\ast}\right\Vert _{\psi
})\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}=\left\Vert u_{\Gamma_{n}%
}^{\ast}\right\Vert _{2}+\left\Vert u_{g_{n}}^{\ast}\right\Vert _{2}\leq C,
\label{P-L6-AP-3}%
\end{equation}
which verifies (4.10) in HLR. By (\ref{P-L6-AP-1}), (\ref{P-L6-AP-2}) and
(\ref{P-L6-AP-3}), Assumption 4.3.(iv) in HLR is verified.
\end{proof}
\section{Verification of Assumptions 3.2 and\ 3.4 in Example 2.1 \label{SA-2}}
In this section, we use the nonparametric triangular simultaneous equation
model in Newey, Powell and Vella (1999) to illustrate the high-level
sufficient conditions for the asymptotic normality of the two-step sieve estimator.
The first step nonparametric estimation takes the following form:%
\begin{equation}
\widehat{h}_{n}=\arg\max_{h\in\mathcal{H}_{n}}-\frac{1}{2n}\sum\limits_{i=1}%
^{n}\left( x_{i}-h\left( w_{1,i}\right) \right) ^{2} \label{A_NPV_1}%
\end{equation}
where $\mathcal{H}_{n}=\{h:h\left( \cdot\right) =R(\cdot)^{\prime}\gamma$,
$\gamma\in\mathbb{R}^{L(n)}\}$. Let $R(w_{1,i})=\left[ r_{1}(w_{1,i}%
),\ldots,r_{L(n)}(w_{1,i})\right] ^{\prime}$\ for $i=1,\ldots,n$,
and\ $R_{n}=\left[ R(w_{1,1}),\ldots,R(w_{1,n})\right] $.\ The first step M
estimator $\widehat{h}_{n}$ has a closed form expression%
\begin{equation}
\widehat{h}_{n}(\cdot)=R(\cdot)^{\prime}\left( R_{n}R_{n}^{\prime}\right)
^{-1}R_{n}X_{n}=R(\cdot)^{\prime}\widehat{\gamma}_{n} \label{A_NPV_2}%
\end{equation}
where\ $X_{n}=\left[ x_{1},\ldots,x_{n}\right] ^{\prime}$. To define the
second step M estimation, let $P(w)=\left[ p_{1}(w),\ldots,p_{K(n)}%
(w)\right] ^{\prime}$ be a vector of approximating functions of
$w=(x,w_{2}^{\prime},u)^{\prime}$ such that each $p_{k}(w)$ depends on
$(x,w_{2})$ or on $u$, but not both. From the first step estimator, we
calculate $\widehat{u}_{i}=x_{i}-\widehat{h}_{n}(w_{1,i})$ for $i=1,\ldots,n$.
Let\ $P(\widehat{w}_{i})=[p_{1}(\widehat{w}_{i}),\ldots,p_{K(n)}(\widehat
{w}_{i})]^{\prime}$ and $\widehat{P}_{n}=[\widehat{\tau}_{1}P(\widehat{w}%
_{1}),\ldots,\widehat{\tau}_{n}P(\widehat{w}_{n})]^{\prime}$, where
$\widehat{w}_{i}=(x_{i},w_{2,i}^{\prime},\widehat{u}_{i})^{\prime}$ and
$\widehat{\tau}_{i}=\prod\nolimits_{j=1}^{d_{w_{2}}+2}I\{a_{j}\leq\widehat
{w}_{j,i}\leq b_{j}\}$\ for $i=1,\ldots,n$, where $d_{w_{2}}$ denotes the
dimension of $w_{2}$ and $\widehat{w}_{j,i}$ is the $j$-th component of
$\widehat{w}_{i}$ for $j=1,\ldots,d_{w_{2}}+2$. Let $g_{o}(w)=m_{o}%
(x,w_{2})+\lambda_{o}\left( u\right) $ and $\eta=y-m_{o}(x,w_{2}%
)-\lambda_{o}\left( u\right) $. By the definition of $\lambda_{o}\left(
u\right) $, and the conditional moment restrictions in (3) of HLR, we have%
\begin{equation}
\mathbb{E[}\eta|x,w_{1}]=0. \label{A_NPV_3}%
\end{equation}
Let $\mathcal{T}_{w}=\{w:\tau(w)=1\}$ where $\tau(w)=\prod\nolimits_{j=1}%
^{d_{w_{2}}+2}I\{a_{j}\leq w_{j,i}\leq b_{j}\}$. The second step M estimator
(of $g_{o}$) is%
\begin{equation}
\widehat{g}_{n}=\arg\max_{g\in\mathcal{G}_{n}}-n^{-1}\sum\limits_{i=1}%
^{n}\widehat{\tau}_{i}(y_{i}-g(\widehat{w}_{i}))^{2} \label{A_NPV_4}%
\end{equation}
where $\mathcal{G}_{n}=\{g\left( \cdot\right) :g\left( \cdot\right)
=\tau(\cdot)P(\cdot)^{\prime}\beta$, $\beta\in\mathbb{R}^{K(n)}\}$. The second
step M estimator $\widehat{g}_{n}$ also has a closed form expression
\begin{equation}
\widehat{g}_{n}(w)=P(w)^{\prime}(\widehat{P}_{n}^{\prime}\widehat{P}_{n}%
)^{-1}\widehat{P}_{n}^{\prime}Y_{n}=P(w)^{\prime}\widehat{\beta}_{n}
\label{A_NPV_5}%
\end{equation}
for any $w\in\mathcal{T}_{w}$, where\ $Y_{n}=\left[ y_{1},\ldots
,y_{n}\right] ^{\prime}$. The plug-in estimator of $\rho(g_{o})$ is
$\rho(\widehat{g}_{n})$, where $\rho(\cdot)$ is a linear functional of $g$.
We next list the low level sufficient conditions for the asymptotic normality
of $\rho(\widehat{g}_{n})$. These assumptions are from Newey, Powell and Vella (1999).
\begin{assumption}
\label{AA-E-1} $\left\{ (y_{i},x_{i},w_{1,i})\right\} _{i=1}^{n}$ is i.i.d.,
$var(x|w_{1})$ and $var(y|x,w_{1})$ are bounded.
\end{assumption}
\begin{assumption}
\label{AA-E-2} $w_{1}$ is continuously distributed with density that is
bounded away from zero on its support, and the support of $w_{1}$ is a
cartesian product of compact, connected intervals. Also $w$ is continuously
distributed and its density is bounded away from zero on $\mathcal{T}_{w}$,
and $\mathcal{T}_{w}$\ is contained in the interior of the support of $w$.
\end{assumption}
\begin{assumption}
\label{AA-E-3} $h_{o}\left( w_{1}\right) $ is continuously differentiable of
order $s_{1}$ on the support of $w_{1}$ and $m_{o}(x,w_{2})$ and $\lambda
_{o}(u)$ are Lipschitz and continuous differentiable of order $s$ on
$\mathcal{T}_{w}$.
\end{assumption}
In the rest of the section, we write $L$ and $K$ for $L(n)$ and $K(n)$
respectively for notational simplicity. Following Newey, Powell and Vella
(1999), we consider two types of approximating functions for $R(w_{1})$ and
$P(w)$:\ the power series and splines.
\begin{assumption}
\label{AA-E-4} Either (a) for power series, $(K^{3}+K^{2}L)(L^{1/2}%
n^{-1/2}+L^{-s_{1}/d_{w_{1}}})=o(1)$; or (b) for splines, $(K^{2}%
+KL)(L^{1/2}n^{-1/2}+L^{-s_{1}/d_{w_{1}}})=o(1)$.
\end{assumption}
By Assumption \ref{AA-E-3}, there exists $\gamma_{o,L}\in\mathbb{R}^{L}$ such
that%
\begin{equation}
\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert h_{o,L}(w_{1})-h_{o}(w_{1}%
)\right\vert \leq CL^{-s_{1}/d_{w_{1}}}, \label{AA-AP-1}%
\end{equation}
where $h_{o,L}(w_{1})=R(w_{1})^{\prime}\gamma_{o,L}$,\ $\mathcal{W}_{1}$
denotes the support of $w_{1}$ and $d_{w_{1}}$ denotes the dimension of
$w_{1}$, and there exists $\beta_{o,K}\in\mathbb{R}^{K}$ such that%
\begin{equation}
\sup_{w\in\mathcal{T}_{w}}\left\vert g_{o,K}(w)-g_{o}(w)\right\vert \leq
CK^{-s/d} \label{AA-AP-2}%
\end{equation}
where $g_{o,K}(w)=P(w)^{\prime}\beta_{o,K}$ and $d$ denotes the dimension
of\ $\left( x,w_{2}^{\prime}\right) ^{\prime}$.
We next calculate the Riesz representors $v_{g_{n}}^{\ast}$ and $v_{\Gamma
_{n}}^{\ast}$. Let $Z_{1,i}=(x_{i},w_{1,i}^{\prime})^{\prime}$ and
$\varphi\left( Z_{1,i},h\right) =-\left( x_{i}-h\left( w_{1,i}\right)
\right) ^{2}/2$. By definition, $\left\langle v_{h_{1}},v_{h_{2}%
}\right\rangle _{\varphi}=\mathbb{E}\left[ v_{h_{1}}(w_{1})v_{h_{2}}%
(w_{1})\right] $ for any\ $v_{h_{1}},v_{h_{2}}\in\mathcal{V}_{1}$. Let
$Z_{2,i}=(y_{i},x_{i},w_{1,i}^{\prime})^{\prime}$, $u_{h,i}=x_{i}-h(w_{1,i})$
and $w_{h,i}=(x_{i},w_{2,i}^{\prime},u_{h,i})^{\prime}$. The criterion
function of the second step estimation is%
\[
\psi\left( Z_{2,i},g,h\right) =-\tau(w_{h,i})\left( y_{i}-m\left(
x_{i},w_{2,i}\right) -\lambda\left( x_{i}-h(w_{1,i})\right) \right)
^{2}/2.
\]
By definition, $\langle v_{g_{1}},v_{g_{2}}\rangle_{\psi}=\mathbb{E}\left[
\tau(w)v_{g_{1}}(w)v_{g_{2}}(w)\right] $\ for any $v_{g_{1}},v_{g_{2}}%
\in\mathcal{V}_{2}$. By some simple calculation, we get%
\[
v_{g_{n}}^{\ast}(\cdot)=\tau(\cdot)P(\cdot)^{\prime}Q_{K}^{-1}\rho(P_{K}),
\]
where $Q_{K}=\mathbb{E}\left[ \tau(w)P(w)P(w)^{\prime}\right] $ and
$\rho(P_{K})=\left[ \rho(p_{1}),\ldots,\rho(p_{K})\right] ^{\prime}$.
Moreover, by the conditional moment condition (\ref{A_NPV_3}), we have%
\[
\Gamma(\alpha_{o})\left[ v_{h},v_{g}\right] =\mathbb{E}\left[
\tau(w)\partial_{u}g_{o}(w)v_{h}(w_{1})v_{g}(w)\right]
\]
where $\partial_{u}g_{o}(w)=\partial g_{o}(w)/\partial u$, which implies that%
\[
v_{\Gamma_{n}}^{\ast}(\cdot)=R(\cdot)^{\prime}Q_{L}^{-1}\mathbb{E}\left[
\tau(w)\partial_{u}g_{o}(w)v_{g_{n}}^{\ast}(w)\right] =R(\cdot)^{\prime}%
Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K}),
\]
where $H=\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)R(w_{1})P(w)^{\prime
}\right] $ and\ $Q_{L}=\mathbb{E}\left[ R(w_{1})R(w_{1})^{\prime}\right]
$.\ Using the sieve Riesz representors $v_{g_{n}}^{\ast}$ and $v_{\Gamma_{n}%
}^{\ast}$, and the i.i.d. assumption, we have%
\begin{align}
\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2} & =\text{Var}\left[
n^{-\frac{1}{2}}\sum\nolimits_{i=1}^{n}\left( u_{i}v_{\Gamma_{n}}^{\ast
}(w_{1,i})+\eta_{i}\tau(w_{i})v_{g_{n}}^{\ast}(w_{i}\right) )\right]
\nonumber\\
& =\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right]
+\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right]
\label{V_n_EX2}%
\end{align}
where the second equality is by (\ref{A_NPV_3}). Let $\Sigma_{K}%
=\mathbb{E}\left[ \eta^{2}\tau(w)P(w)P(w)^{\prime}\right] $ and $\Sigma
_{L}=\mathbb{E}\left[ u^{2}R(w_{1})R(w_{1})^{\prime}\right] $. By the
explicit expressions of $v_{g_{n}}^{\ast}$ and $v_{\Gamma_{n}}^{\ast}$,%
\begin{align*}
\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2} & =\rho(P_{K})^{\prime}%
Q_{K}^{-1}H^{\prime}Q_{L}^{-1}\mathbb{E}\left[ u^{2}R(w_{1})R(w_{1})^{\prime
}\right] Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})\\
& +\rho(P_{K})^{\prime}Q_{K}^{-1}\mathbb{E}\left[ \eta^{2}\tau
(w)P(w)P(w)^{\prime}\right] Q_{K}^{-1}\rho(P_{K})\\
& =\rho(P_{K})^{\prime}Q_{K}^{-1}\left[ \Sigma_{K}+H^{\prime}Q_{L}%
^{-1}\Sigma_{L}Q_{L}^{-1}H\right] Q_{K}^{-1}\rho(P_{K})
\end{align*}
which is the same as the variance-covariance matrix $V$ of the two-step
estimator defined on page 596 of Newey, Powell and Vella (1999).
\begin{assumption}
\label{AA-E-5} $\sigma^{2}(x,w_{1})=var(y|x,w_{1})$ is bounded away from zero,
$\mathbb{E}[\eta^{4}|x,w_{1}]$ is bounded, and $\mathbb{E}[u^{4}|x,w_{1}]$ is
bounded. Also $g_{o}(w)$ is twice continuously differentiable in $u$ with
bounded first and second derivatives.
\end{assumption}
\begin{assumption}
\label{AA-E-6} There exists $v_{g}^{\ast}(w)$ and $\beta_{v,K}$ such that
$\mathbb{E}[\tau(w)\left\vert v_{g}^{\ast}(w)\right\vert ^{2}]<\infty$,
$\rho(g_{o})=\mathbb{E}[\tau(w)v_{g}^{\ast}(w)g_{o}(w)]$, $\rho(p_{k}%
)=\mathbb{E}[\tau(w)v_{g}^{\ast}(w)p_{k}(w)]$ and $\mathbb{E}[\tau
(w)\left\vert v_{g}^{\ast}(w)-P(w)^{\prime}\beta_{v,K}\right\vert
^{2}]\rightarrow0$ as $K\rightarrow\infty$.
\end{assumption}
For any $d_{w}\times1$ vector $a$ of nonnegative integers, let $|a|=\sum
_{j=1}^{d_{w}}a_{j}$, $\partial^{a}g(w)=\partial^{|a|}g(w)/\partial
w_{1}\cdots\partial w_{d_{w}}$. Let $\xi_{\delta,K}$ ($\delta=0,1$) and
$\zeta_{L}$ be nondecreasing sequences such that $\max_{|a|\leq\delta}%
\sup_{w\in\mathcal{T}_{w}}\left\Vert \partial^{a}P(w)\right\Vert \leq
\xi_{\delta,K}$ and $\sup_{w_{1}\in\mathcal{W}_{1}}\left\Vert R(w_{1}%
)\right\Vert \leq\zeta_{L}$ respectively. The following assumption is on the
numbers of generic approximating functions in the first step and second step estimations.\
\begin{assumption}
\label{AA-E-7} $n^{1/2}K^{-s/d}=o(1)$ and $n^{1/2}L^{-s_{1}/d_{w_{1}}}=o(1)$,
and%
\begin{equation}
\xi_{0,K}^{2}(L^{2}+K^{2})\log(n)n^{-1}+\xi_{0,K}^{2}\zeta_{L}^{2}L(\zeta
_{L}^{2}L+\xi_{0,K}^{2}K)n^{-1}+\xi_{1,K}^{2}LKn^{-1}=o(1). \label{AAE7-1}%
\end{equation}
\end{assumption}
When the power series are used in the two-step estimation, we have $\zeta
_{L}\leq CL$ and $\xi_{\delta,K}\leq CK^{1+2\delta}$ ($\delta=0,1$). Under the
conditions that $n^{1/2}K^{-s/d}=o(1)$ and $n^{1/2}L^{-s_{1}/d_{w_{1}}}=o(1)$,
the sufficient condition for (\ref{AAE7-1}) becomes%
\[
(K^{7}L+K^{5}L^{3}+K^{2}L^{6})n^{-1}=o(1)
\]
which is implied by Assumption 8 in Newey, Powell and Vella (1999). When the
splines are used in the two-step estimation, we have $\zeta_{L}\leq CL^{1/2}$
and $\xi_{\delta,K}\leq CK^{1/2+\delta}$ ($\delta=0,1$). Under the conditions
that $n^{1/2}K^{-s/d}=o(1)$ and $n^{1/2}L^{-s_{1}/d_{w_{1}}}=o(1)$, the
sufficient condition for (\ref{AAE7-1}) becomes%
\[
(K^{4}L+K^{3}L^{2}+KL^{4})n^{-1}=o(1)
\]
which is also implied by Assumption 8 in Newey, Powell and Vella (1999).
\begin{theorem}
\label{AT-E-1}Under Assumptions \ref{AA-E-1}-\ref{AA-E-7}, we have%
\begin{equation}
\frac{\sqrt{n}\left[ \rho(\widehat{g}_{n})-\rho(g_{o})\right] }{\left\Vert
v_{n}^{\ast}\right\Vert _{sd}}\rightarrow_{d}N(0,1). \label{AT-E1-1}%
\end{equation}
\end{theorem}
\begin{proof}
[Proof of Theorem \ref{AT-E-1}]Define $\delta_{h,n}=\delta_{h,n}^{\ast}%
\varrho_{n}$ and $\delta_{g,n}=\delta_{g,n}^{\ast}\varrho_{n}$ where
$\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-s_{1}/d_{w_{1}}}$, $\delta
_{g,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+\delta_{h,n}^{\ast}$ and $\{
\varrho_{n}\}_{n}$ is a slowly divergent real positive sequence. Let
$\mathcal{N}_{\gamma,n}=\{ \gamma\in\mathbb{R}^{L}$: $||\gamma-\gamma
_{o,L}||\leq\delta_{h,n}\}$ where $\delta_{h,n}=\delta_{h,n}^{\ast}\varrho
_{n}$ and $\{ \varrho_{n}\}_{n}$ is a slowly divergent real positive
sequence.\ Similarly, define $\mathcal{N}_{\beta,n}=\{ \beta\in\mathbb{R}^{K}%
$: $||\beta-\beta_{o,K}||\leq\delta_{g,n}\}$ where $\delta_{g,n}=\delta
_{g,n}^{\ast}\varrho_{n}$. By Lemma \ref{AL-E-1}.(b) and Lemma \ref{AL-E-1}%
.(d), we have $\widehat{\gamma}_{n}\in\mathcal{N}_{\gamma,n}$ and
$\widehat{\beta}_{n}\in\mathcal{N}_{\beta,n}$ wpa1. Define $\mathcal{N}%
_{h,n}=\{h\left( \cdot\right) =R\left( \cdot\right) ^{\prime}\gamma$:
$\gamma\in\mathcal{N}_{\gamma,n}\}$ and $\mathcal{N}_{g,n}=\{g\left(
\cdot\right) =P\left( \cdot\right) ^{\prime}\beta$: $\beta\in
\mathcal{N}_{\gamma,n}\}$.\footnote{Let $\left\Vert h\right\Vert
_{2}=(\mathbb{E}\left[ h(w_{1})^{2}\right] )^{1/2}$ denote the $L_{2}$-norm
and $\left\Vert g\right\Vert _{2,\tau}=(\mathbb{E}\left[ \tau(w)g(w)^{2}%
\right] )^{1/2}$ denote the restricted $L_{2}$-norm. One may also define the
local neighborhoods of $h_{o}$ and $g_{o}$ as: $\mathcal{N}_{h,n}^{\prime
}=\{h\left( \cdot\right) =R\left( \cdot\right) ^{\prime}\gamma$:
$\left\Vert h-h_{o}\right\Vert _{2}\leq\delta_{h,L}\varrho_{n}^{\prime}\}$ and
$\mathcal{N}_{g,n}^{\prime}=\{g\left( \cdot\right) =P\left( \cdot\right)
^{\prime}\beta$: $\left\Vert g-g_{o}\right\Vert _{2,\tau}\leq\delta
_{g,L}\varrho_{n}^{\prime}\}$ respectively, where\ $\{ \varrho_{n}^{\prime
}\}_{n}$ is a slowly divergent real sequence. For any $h=R\left(
\cdot\right) ^{\prime}\gamma_{h}\in\mathcal{N}_{h,n}^{\prime}$, by the
triangle inequality,
\[
\left\Vert h-h_{o,n}\right\Vert \leq\left\Vert h-h_{o}\right\Vert +\left\Vert
h_{o,n}-h_{o}\right\Vert \leq2\delta_{h,L}\varrho_{n}^{\prime}%
\]
which implies that $||\gamma_{h}-\gamma_{o,L}||\leq2\omega_{\min}^{-1}%
(Q_{L})\delta_{h,L}\varrho_{n}^{\prime}$, where $\omega_{\min}(Q_{L})$ denotes
the smallest eigenvalue of $Q_{L}$ which is bounded away from zero by
Assumption \ref{AA-E-2}. Hence if we let $\varrho_{n}=2\omega_{\min}%
^{-1}(Q_{L})\varrho_{n}^{\prime}$, then $\gamma_{h}\in\mathcal{N}_{\gamma,n}$
which implies that $h\in\mathcal{N}_{h,n}$ and hence $\mathcal{N}%
_{h,n}^{\prime}\subset\mathcal{N}_{h,n}$. Similarly, we can appropriately
choose $\varrho_{n}$ such that $\mathcal{N}_{g,n}^{\prime}\subset
\mathcal{N}_{g,n}$. This means the high-level sufficient conditions verified
under $\mathcal{N}_{h,n}$ and/or $\mathcal{N}_{g,n}$ holds for their
counterparts under $\mathcal{N}_{h,n}^{\prime}$ and/or $\mathcal{N}%
_{g,n}^{\prime}$.}\ By Lemma \ref{AL-E-1}.(b) and Lemma \ref{AL-E-1}.(d), we
have $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ and $\widehat{g}_{n}\in
\mathcal{N}_{g,n}$ wpa1. The proof of the theorem is divided into three steps.
\textbf{Step 1.} We verify Assumption 3.1 in HLR. By Assumptions \ref{AA-E-5}
and \ref{AA-E-6}, Lemma \ref{AL-E-0} implies that
\begin{equation}
\left\Vert v_{n}^{\ast}\right\Vert _{sd}\rightarrow\mathbb{E}\left[ \eta
^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}\left[ u^{2}(v_{\Gamma
}^{\ast}(w_{1}))^{2}\right] \label{P-AT-E1-1}%
\end{equation}
as $K\rightarrow\infty$ and $L\rightarrow\infty$, where $v_{\Gamma}^{\ast
}(w_{1})=\mathbb{E}\left[ \tau(w)v_{g}^{\ast}(w)\partial_{u}g_{o}%
(w)|w_{1}\right] $. The above limit is the same as the asymptotic variance
defined in (5.7) of Newey, Powell and Vella (1999). By Assumption
\ref{AA-E-5}, $\mathbb{E}\left[ \eta^{2}|x,w_{1}\right] >C_{\eta}$ where
$C_{\eta}$ is a finite positive constant. This means that
\begin{equation}
\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] \geq C_{\eta
}\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] >0 \label{P-AT-E1-2}%
\end{equation}
where the last inequality is by the fact that $\rho(g_{o})$ is an unknown
value. If $\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] =0$, we have
$\tau(w)(v_{g}^{\ast}(w))^{2}=0$ almost surely which together with (5.6) in
Newey, Powell and Vella (1999) implies that $\rho(g)=0$ for any $g\in
\mathcal{G}$, where $\mathcal{G}$ includes all additive functions satisfying
Assumptions \ref{AA-E-3} and \ref{AA-E-5}. In such a case, $\rho(g_{o})$ will
be a known (to zero) value. Combining the results in (\ref{P-AT-E1-1}) and
(\ref{P-AT-E1-2}) we have $\liminf_{n}\left\Vert v_{n}^{\ast}\right\Vert
_{sd}>0$, which verifies Assumption 3.1.(i). Because $\rho(\cdot)$ is a linear
functional and $\left\Vert v_{n}^{\ast}\right\Vert _{sd}$ is bounded away from
zero, Assumption 3.1.(ii) holds trivially. The strong norms $\left\Vert
\cdot\right\Vert _{\mathcal{H}}$ and $\left\Vert \cdot\right\Vert
_{\mathcal{G}}$ used to establish the convergence rate of $\widehat{h}_{n}$
and $\widehat{g}_{n}$ respectively are the $L_{2}$-norm $\left\Vert
h\right\Vert _{2}=(\mathbb{E}\left[ (h(w_{1}))^{2}\right] )^{1/2}$ and the
restricted $L_{2}$-norm $\left\Vert g\right\Vert _{2,\tau}=(\mathbb{E}\left[
\tau(w)(g(w))^{2}\right] )^{1/2}$ respectively (see footnote 1 for details).
By the definitions of $\left\Vert \cdot\right\Vert _{\varphi}$ and $\left\Vert
\cdot\right\Vert _{\psi}$,\ we can set $c_{\varphi}=1$ and $c_{\psi}=1$ such
that $\left\Vert v_{h}\right\Vert _{\varphi}\leq c_{\varphi}\left\Vert
v_{h}\right\Vert _{\mathcal{H}}$ and $\left\Vert v_{g}\right\Vert _{\psi}\leq
c_{\psi}\left\Vert v_{g}\right\Vert _{\mathcal{G}}$ for any $v_{h}%
\in\mathcal{V}_{1}$ and $v_{g}\in\mathcal{V}_{2}$. Under Assumptions
\ref{AA-E-1}-\ref{AA-E-4}, we can use Lemma 4.1 of Newey, Powell and Vella
(1999) to get%
\begin{equation}
\left\Vert \widehat{g}_{n}(w)-g_{o}\right\Vert _{\mathcal{G}}=\delta
_{2,n}^{\ast} \label{P-AT-E1-3}%
\end{equation}
where $\delta_{2,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+L^{1/2}n^{-1/2}%
+L^{-s_{1}/d_{w_{1}}}$. Let $g_{n}\left( \cdot\right) =g_{o,K}\left(
\cdot\right) $ where $g_{o,K}$ is defined in (\ref{AA-AP-2}). Then by
(\ref{AA-AP-2}) we have%
\begin{equation}
\left\Vert g_{n}-g_{o}\right\Vert _{\mathcal{G}}=\left\Vert g_{o,K}%
-g_{o}\right\Vert _{\mathcal{G}}\leq\sup_{w\in\mathcal{T}_{w}}\left\vert
g_{o,K}(w)-g_{o}(w)\right\vert =O(\delta_{2,n}^{\ast}), \label{P-AT-E1-4}%
\end{equation}
which finishes verification of Assumption 3.1.(iii). For Assumption 3.1.(iv),
as $\rho(g)$ is linear and it only depends on $g$, it is sufficient to show
that
\begin{equation}
\frac{1}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}\left\vert \rho
(g_{o,n}-g_{o})\right\vert =o(n^{-\frac{1}{2}}) \label{P-AT-E1-5}%
\end{equation}
where $g_{o,n}$ denotes the projection of $g_{o}$ on the finite dimensional
sieve space with respect to the restricted $L_{2}$-norm $\left\Vert
\cdot\right\Vert _{2,\tau}$.\ By (5.6) in Newey, Powell and Vella (1999),
\begin{align}
\left\vert \rho(g_{o,n}-g_{o})\right\vert ^{2} & =\left\vert \mathbb{E}%
\left[ \tau(w)v_{g}^{\ast}(w)(g_{o,n}(w)-g_{o}(w))\right] \right\vert
^{2}\nonumber\\
& \leq\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] \mathbb{E}%
\left[ \tau(w)(g_{o,n}(w)-g_{o}(w))^{2}\right] \nonumber\\
& \leq\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] \mathbb{E}%
\left[ \tau(w)(g_{o,K}(w)-g_{o}(w))^{2}\right] =O(K^{-2s/d})
\label{P-AT-E1-6}%
\end{align}
where the first inequality is by H\"{o}lder's inequality, the second
inequality is by the definition of $g_{o,n}$, the last equality is by
(\ref{AA-AP-2}) and Assumption \ref{AA-E-6}. By Assumption 3.1.(i) (which has
already been verified), (\ref{P-AT-E1-6}) and Assumption \ref{AA-E-7}, we
prove (\ref{P-AT-E1-5}) and hence Assumption 3.1.(iv).
\textbf{Step 2.} We verify Assumption 3.2 of HLR. Let $u_{h}=x-h(w_{1})$ and
$w_{h}=(x,w_{2}^{\prime},u_{h})^{\prime}$. By definition%
\begin{align}
& \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)-\Delta_{\psi}(Z_{2},g,h)[\pm
\kappa_{n}u_{g_{n}}^{\ast}]\nonumber\\
& =-\frac{\tau(w_{h})(y-g(w_{h})\mp\kappa_{n}u_{g_{n}}^{\ast}(w))^{2}}%
{2}\nonumber\\
& +\frac{\tau(w_{h})(y-g(w_{h}))^{2}}{2}-\tau(w_{h})(y-g(w_{h}))(\pm
\kappa_{n}u_{g_{n}}^{\ast})\nonumber\\
& =-\frac{\kappa_{n}^{2}}{2}\tau(w_{h})(u_{g_{n}}^{\ast}(w))^{2},
\label{P-AT-E1-7}%
\end{align}
where $u_{g_{n}}^{\ast}(w)=v_{g_{n}}^{\ast}(w)/\left\Vert v_{n}^{\ast
}\right\Vert _{sd}$ and\ $\left\Vert v_{n}^{\ast}\right\Vert _{sd}$ is defined
in (\ref{V_n_EX2}). By the triangle inequality, Lemma \ref{AL-E-2}.(e)-(f) and
(\ref{P-AT-E1-7}),
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu
_{n}\left\{ \psi(Z_{2},g^{\ast},h)-\psi(Z_{2},g,h)-\Delta_{\psi}%
(Z_{2},g,h)[\pm\kappa_{n}u_{g_{n}}^{\ast}]\right\} \right\vert \nonumber\\
& \leq\frac{\kappa_{n}^{2}}{2}n^{-1}\sum_{i=1}^{n}(u_{g_{n}}^{\ast}%
(w_{i})^{2}+\mathbb{E}[u_{g_{n}}^{\ast}(w)^{2}])=O_{p}(\kappa_{n}^{2})
\label{P-AT-E1-8}%
\end{align}
which verifies the first condition (12) of Assumption 3.2.(i) in HLR. Instead
of verifying (13) of Assumption 3.2.(i) in HLR, we show that Assumption
3.4\ holds. Assumption 3.4.(i) is implied by Assumption \ref{AA-E-1}. Let
$\tau(Z_{1},h)=\tau(w_{h})$ and $\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}%
}^{\ast}]=(y-g(w_{h}))u_{g_{n}}^{\ast}(w)$. By definition,
\begin{equation}
\Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]=\tau(w_{h})(y-g(w_{h}))u_{g_{n}%
}^{\ast}(w)=\tau(Z_{1},h)\Delta_{\psi}^{\ast}(Z_{2},g,h)[u_{g_{n}}^{\ast}].
\label{P-AT-E1-9}%
\end{equation}
Therefore equation (18) of HLR holds. By definition,\ $\tau(w_{h})$ and
$u_{g_{n}}^{\ast}(w)$ only depend on $(x,w_{1})$. By (\ref{A_NPV_3}),
\begin{equation}
\mathbb{E}\left[ \left. \Delta_{\psi}^{\ast}(Z_{2},g_{o},h_{o})[u_{g_{n}%
}^{\ast}]\right\vert Z_{1}\right] =\mathbb{E}\left[ \left. (y-g_{o}%
(w))u_{g_{n}}^{\ast}(w)\right\vert x,w_{1}\right] =u_{g_{n}}^{\ast
}(w)\mathbb{E}\left[ \left. \eta\right\vert x,w_{1}\right] =0
\label{P-AT-E1-10}%
\end{equation}
which verifies (19) of HLR. By (\ref{P-AT-E1-9}) and (\ref{P-AT-E1-10}) we
show that Assumption 3.4.(ii) of HLR holds. By definition,
\begin{equation}
\Delta_{\psi}(Z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}(Z_{2},g_{o}%
,h)[u_{g_{n}}^{\ast}]=\tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast}(w),
\label{P-AT-E1-11}%
\end{equation}
and
\begin{equation}
\tau(Z_{2},h)(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h)[u_{g_{n}}^{\ast}%
]-\Delta_{\psi}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])=\tau(w_{h}%
)(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast}(w). \label{P-AT-E1-12}%
\end{equation}
Hence Assumption 3.4.(iii) follows by Lemmas \ref{AL-E-5} and \ref{AL-E-6}. By
Assumption \ref{AA-E-5} and Lemma \ref{AL-E-2}.(f) we have for any $h$
\begin{align}
& (\tau(Z_{1},h)-\tau(Z_{1},h_{o}))^{2}\mathbb{E}\left[ \left.
(\Delta_{\psi}^{\ast}(Z_{2},g_{o},h_{o})[u_{g_{n}}^{\ast}])^{2}\right\vert
Z_{1}\right] \nonumber\\
& =(\tau(w_{h})-\tau(w))^{2}(u_{g_{n}}^{\ast}(w))\mathbb{E}\left[ \left.
\eta^{2}\right\vert Z_{1}\right] \leq C\xi_{0,K}^{2}(\tau(w_{h})-\tau
(w))^{2}, \label{P-AT-E1-13}%
\end{align}
which together with Lemma \ref{AL-E-2}.(d) implies that%
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}(\tau(Z_{1,i}%
,h)-\tau(Z_{1,i},h_{o}))^{2}\mathbb{E}\left[ \left. (\Delta_{\psi}^{\ast
}(Z_{2,i},g_{o},h_{o})[u_{g_{n}}^{\ast}])^{2}\right\vert Z_{1,i}\right]
\nonumber\\
& \leq C\xi_{0,K}^{2}\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}%
(\tau(Z_{1,i},h)-\tau(Z_{1,i},h_{o}))^{2}=O_{p}(\xi_{0,K}^{2}\zeta_{L}%
\delta_{h,n}) \label{P-AT-E1-14}%
\end{align}
where the $\xi_{0,K}^{2}\zeta_{L}\delta_{h,n}=o(1)$ by Assumption
\ref{AA-E-7}. This proves Assumption 3.4.(iv) and hence finishes verification
of Assumption 3.4.
We next verify Assumption 3.2.(ii) of HLR. By definition,
\begin{equation}
\psi(Z_{2},g,h)-\psi(Z_{2},g^{\ast},h)=\tau(w_{h})(y-g(w_{h}))(\mp\kappa
_{n}u_{g_{n}}^{\ast})+\frac{\kappa_{n}^{2}}{2}\tau(w_{h})(u_{g_{n}}^{\ast
}(w))^{2}, \label{P-AT-E1-15}%
\end{equation}
which together with Lemma \ref{AL-E-2}.(e) and the definition of $K_{\psi
}(g,h)$ implies that
\begin{equation}
K_{\psi}(g,h)-K_{\psi}(g^{\ast},h)=\mp\kappa_{n}\mathbb{E}\left[ \tau
(w_{h})(y-g(w_{h}))u_{g_{n}}^{\ast}(w)\right] +O(\kappa_{n}^{2}).
\label{P-AT-E1-16}%
\end{equation}
By (\ref{A_NPV_3}),
\begin{equation}
\mathbb{E}\left[ (\tau(w_{h})-\tau(w))(y-g_{o}(w))u_{g_{n}}^{\ast}(w)\right]
=0 \label{P-AT-E1-17}%
\end{equation}
which implies that
\begin{align}
& \mathbb{E}\left[ \tau(w_{h})(y-g(w_{h}))u_{g_{n}}^{\ast}(w)\right]
\nonumber\\
& =\mathbb{E}\left[ \tau(w_{h})(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast
}(w)\right] \nonumber\\
& +\mathbb{E}\left[ \tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast
}(w)\right] . \label{P-AT-E1-18}%
\end{align}
Using the second order expansion in (\ref{AL-E5-1}),%
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ \tau(w_{h}%
)(g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1})))u_{g_{n}%
}^{\ast}(w)\right] \right\vert \nonumber\\
& \leq C\sup_{w}|u_{g_{n}}^{\ast}(w)|\sup_{h\in\mathcal{N}_{h,n}}%
\mathbb{E}\left[ (h(w)-h_{o}(w))^{2}\right] \leq C\xi_{0,K}\delta_{h,n}%
^{2}=o_{p}(n^{-1/2}), \label{P-AT-E1-19}%
\end{align}
where the second inequality is by Lemma \ref{AL-E-2}.(b) and \ref{AL-E-2}.(f),
the equality is by Assumption \ref{AA-E-7}. By Assumption \ref{AA-E-5},
(\ref{AA-AP-1}), Lemma \ref{AL-E-1}.(c) and \ref{AL-E-2}.(g) and the
definition of $\mathcal{N}_{h,n}$,
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(w_{h}%
)-\tau(w))\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast
}(w)\right] \right\vert \nonumber\\
& \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \left( \sup
_{h\in\mathcal{N}_{h,n}}\sup_{w_{1}}\left\vert h(w_{1})-h_{o}(w_{1}%
)\right\vert \right) \sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert
\tau(w_{h})-\tau(w)\right\vert \right] \nonumber\\
& \leq C\xi_{0,K}\zeta_{L}\delta_{h,n}\left( \zeta_{L}\sup_{\gamma
\in\mathcal{N}_{\gamma,n}}\left\Vert \gamma-\gamma_{o,L}\right\Vert
+CL^{-s_{1}/d_{w_{1}}}\right) \nonumber\\
& \leq C\xi_{0,K}\zeta_{L}^{2}\delta_{h,n}^{2}=o_{p}(n^{-1/2}),
\label{P-AT-E1-20}%
\end{align}
where the equality is by Assumption \ref{AA-E-7}. By (\ref{P-AT-E1-19}),
(\ref{P-AT-E1-20}) and the triangle inequality,%
\begin{equation}
\mathbb{E}\left[ \tau(w_{h})(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast
}(w)\right] =\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1}%
)-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right] +o_{p}(n^{-1/2}),
\label{P-AT-E1-21}%
\end{equation}
uniformly over $(h,g)\in\mathcal{N}_{n}$. By (\ref{AL-E6-2}) in the proof of
Lemma \ref{AL-E-6},
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\mathbb{E}\left[
\left\vert \tau(w_{h})(g_{o}(w_{h})-g(w_{h})-g_{o}(w)+g(w))u_{g_{n}}^{\ast
}(w)\right\vert \right] \nonumber\\
& \leq\xi_{1,K}\sup_{\beta\in\mathcal{N}_{\beta,n}}\left\Vert \beta
-\beta_{o,K}\right\Vert \sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[
\left\vert u_{g_{n}}^{\ast}(w)(h(w_{1})-h_{o}(w_{1}))\right\vert \right]
\nonumber\\
& \leq\xi_{1,K}\delta_{g,n}\left\Vert u_{g_{n}}^{\ast}\right\Vert _{2}%
\sup_{h\in\mathcal{N}_{h,n}}\left\Vert h-h_{o}\right\Vert _{2}\nonumber\\
& \leq\xi_{1,K}\delta_{g,n}\delta_{h,n}=o_{p}(n^{-1/2}), \label{P-AT-E1-22}%
\end{align}
where the second inequality is by H\"{o}lder's inequality and the definition
of $\mathcal{N}_{\beta,n}$, the third inequality is by Lemma \ref{AL-E-2}.(e)
and the definition of $\mathcal{N}_{h,n}$, the equality is by Assumption
\ref{AA-E-7}. Similarly by (\ref{AA-AP-2}), Lemma \ref{AL-E-2}.(b),
\ref{AL-E-2}.(c) and \ref{AL-E-2}.(g),
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\mathbb{E}\left[
\left\vert (\tau(w_{h})-\tau(w))(g_{o}(w)-g(w))u_{g_{n}}^{\ast}(w)\right\vert
\right] \nonumber\\
& \leq\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{g\in
\mathcal{N}_{g,n}}\sup_{w}\left\vert g(w)-g_{o}(w)\right\vert \sup
_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert \tau(w_{h})-\tau
(w)\right\vert \right] \nonumber\\
& \leq C\xi_{0,K}\zeta_{L}\delta_{h,n}\sup_{\beta\in\mathcal{N}_{\beta,n}%
}\left[ \xi_{0,K}\left\Vert \beta-\beta_{o,K}\right\Vert +CK^{-s/d}\right]
\nonumber\\
& \leq C\xi_{0,K}^{2}\zeta_{L}\delta_{g,n}\delta_{h,n}=o_{p}(n^{-1/2}),
\label{P-AT-E1-23}%
\end{align}
where the equality is by Assumption \ref{AA-E-7}. By (\ref{P-AT-E1-22}),
(\ref{P-AT-E1-23}) and the triangle inequality,%
\begin{equation}
\mathbb{E}\left[ \tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast
}(w)\right] =\mathbb{E}\left[ \tau(w)(g_{o}(w)-g(w))u_{g_{n}}^{\ast
}(w)\right] +o_{p}(n^{-1/2}), \label{P-AT-E1-24}%
\end{equation}
uniformly over $(h,g)\in\mathcal{N}_{n}$. Collecting the results in
(\ref{P-AT-E1-16}), (\ref{P-AT-E1-18}), (\ref{P-AT-E1-21}) and
(\ref{P-AT-E1-24}), we deduce that%
\begin{align}
K_{\psi}(g,h)-K_{\psi}(g^{\ast},h) & =\mathbb{E}\left[ \tau(w_{h}%
)(y-g(w_{h}))(\mp\kappa_{n}u_{g_{n}}^{\ast}(w))\right] \nonumber\\
& =\mathbb{E}\left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}%
))(\mp\kappa_{n}u_{g_{n}}^{\ast}(w))\right] \nonumber\\
& +\mathbb{E}\left[ \tau(w)(g_{o}(w)-g(w))(\mp\kappa_{n}u_{g_{n}}^{\ast
}(w))\right] +o_{p}(n^{-1/2}) \label{P-AT-E1-25}%
\end{align}
uniformly over $(h,g)\in\mathcal{N}_{n}$. By definition,%
\begin{equation}
\Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] =\mathbb{E}\left[
\tau(w)\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right]
, \label{P-AT-E1-26}%
\end{equation}
for any $h\in\mathcal{N}_{h,n}$.\ By Jensen's inequality, (\ref{AA-AP-1}),
Assumptions \ref{AA-E-5}, \ref{AA-E-7} and the definition of $h_{o,n}$,%
\begin{align}
\left\vert \Gamma(\alpha_{o})\left[ h_{o,n}-h_{o},u_{g_{n}}^{\ast}\right]
\right\vert & \leq\mathbb{E}\left[ \left\vert \tau(w)\partial_{u}%
g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right\vert \right]
\nonumber\\
& \leq C(\mathbb{E}[\left\vert h(w_{1})-h_{o}(w_{1})\right\vert ^{2}%
])^{1/2}=o(n^{-1/2}). \label{P-AT-E1-26a}%
\end{align}
Moreover,
\begin{align}
& \frac{||g^{\ast}-g_{o}||_{\psi}^{2}-||g-g_{o}||_{\psi}^{2}}{2}\nonumber\\
& =\mathbb{E}\left[ (g(w)-g_{o}(w))(\mp\kappa_{n}u_{g_{n}}^{\ast
}(w))\right] +\frac{\kappa_{n}^{2}}{2}\mathbb{E}\left[ u_{g_{n}}^{\ast
}(w)^{2}\right] \nonumber\\
& =\mathbb{E}\left[ (g(w)-g_{o}(w))(\mp\kappa_{n}u_{g_{n}}^{\ast
}(w))\right] +O_{p}(\kappa_{n}^{2}) \label{P-AT-E1-27}%
\end{align}
uniformly over $g\in\mathcal{N}_{g,n}$, where the second equality is by Lemma
\ref{AL-E-2}.(e). Collecting the results in (\ref{P-AT-E1-25}),
(\ref{P-AT-E1-26}), (\ref{P-AT-E1-26a}) and (\ref{P-AT-E1-27}) proves
Assumption 3.2(ii).
\textbf{Step 3.} We verify Assumption 3.3 of HLR. As $\rho(g)$ does not depend
on $h$, we only need to show that
\begin{equation}
\left\vert \langle\widehat{h}_{n}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi
}-\mu_{n}\left\{ \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast
}]\right\} \right\vert =O_{p}(\kappa_{n}). \label{P-AT-E1-28}%
\end{equation}
By definition%
\begin{equation}
\langle\widehat{h}_{n}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi}%
=\langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi}+\langle
h_{o,L}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi}. \label{P-AT-E1-29}%
\end{equation}
By H\"{o}lder's inequality, (\ref{AA-AP-1}), Lemma \ref{AL-E-2}.(h) and
Assumption \ref{AA-E-7},%
\begin{equation}
\left\vert \langle h_{o,L}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi
}\right\vert \leq\left\Vert u_{\Gamma_{n}}^{\ast}\right\Vert \left\Vert
h_{o,L}-h_{o}\right\Vert =o(n^{-1/2}). \label{P-AT-E1-30}%
\end{equation}
By definition,
\begin{equation}
\widehat{h}_{n}(w_{1})-h_{o,L}(w_{1})=R(w_{1})^{\prime}\left( R_{n}%
R_{n}^{\prime}\right) ^{-1}R_{n}(H_{n}-H_{L,n})+R(w_{1})^{\prime}\left(
R_{n}R_{n}^{\prime}\right) ^{-1}R_{n}U_{n} \label{P-AT-E1-31}%
\end{equation}
where $H_{n}=(h_{o}(w_{1,1}),\ldots,h_{o}(w_{1,n}))^{\prime}$, $H_{L,n}%
=(h_{o,L}(w_{1,1}),\ldots,h_{o,L}(w_{1,n}))^{\prime}$ and $U_{n}=(u_{1}%
,\ldots,u_{n})^{\prime}$. By definition%
\begin{align}
\langle\widehat{h}_{n}-h_{o,L},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi} &
=\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K})^{\prime}Q_{K}%
^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}U_{n}\nonumber\\
& +\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K})^{\prime}%
Q_{K}^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}(H_{n}-H_{L,n}).
\label{P-AT-E1-32}%
\end{align}
By the Cauchy-Schwarz inequality,
\begin{align}
& \left\vert \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K}%
)^{\prime}Q_{K}^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}(H_{n}%
-H_{L,n})\right\vert ^{2}\nonumber\\
& =\left\vert n^{-1}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho
(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-1}R_{n}%
(H_{n}-H_{L,n})\right\vert ^{2}\nonumber\\
& \leq\frac{\rho(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L}%
)^{-1}HQ_{K}^{-1}\rho(P_{K})}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}%
}\nonumber\\
& \times\frac{(H_{n}-H_{L,n})^{\prime}R_{n}^{\prime}(R_{n}R_{n}^{\prime
})^{-1}R_{n}(H_{n}-H_{L,n})}{n}\nonumber\\
& \leq\omega_{\min}^{-1}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L})\sup_{w_{1}%
}\left\vert h_{o}(w_{1})-h_{o,L}(w_{1})\right\vert \frac{\rho(P_{K})^{\prime
}Q_{K}^{-1}H^{\prime}Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})}{\left\Vert v_{n}^{\ast
}\right\Vert _{sd}}\nonumber\\
& \leq C\omega_{\min}^{-1}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L}%
)L^{-2s_{1}/d_{w_{1}}}\frac{\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast
}(w)\right\vert ^{2}\right] }{\left\Vert v_{n}^{\ast}\right\Vert _{sd}}%
=o_{p}(n^{-1}) \label{P-AT-E1-33}%
\end{align}
where $\widehat{Q}_{n,L}=n^{-1}R_{n}R_{n}^{\prime}$,\ the second inequality is
by the fact that $R_{n}^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}$ is an
idempotent matrix, the third inequality is by (\ref{AA-AP-1}) and the
definition of $v_{\Gamma_{n}}^{\ast}$, the last equality is by Lemma
\ref{AL-E-1}.(a), \ref{AL-E-2}.(h) and Assumption \ref{AA-E-7}.\ Hence we
have
\begin{equation}
\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho(P_{K})^{\prime}Q_{K}%
^{-1}H^{\prime}(R_{n}R_{n}^{\prime})^{-1}R_{n}(H_{n}-H_{L,n})=o_{p}(n^{-1/2}).
\label{P-AT-E1-34}%
\end{equation}
By the i.i.d. assumption, Assumption \ref{AA-E-5} and Lemma \ref{AL-E-1}.(a),
\begin{equation}
\mathbb{E}\left[ \left. \left\Vert n^{-1}Q_{L}^{-1}R_{n}U_{n}\right\Vert
^{2}\right\vert \{w_{1,i}\}_{i=1}^{n}\right] \leq\mathbb{E}\left[ \left.
u^{2}\right\vert w_{1}\right] n^{-1}tr(Q_{L}^{-1}\widehat{Q}_{n,L}%
)=O_{p}(n^{-1}), \label{P-AT-E1-35}%
\end{equation}
which together with the Markov inequality implies that
\begin{equation}
\left\Vert n^{-1}Q_{L}^{-1}R_{n}U_{n}\right\Vert =O_{p}(n^{-1/2}).
\label{P-AT-E1-36}%
\end{equation}
By the definition of $v_{\Gamma_{n}}^{\ast}$, Assumption \ref{AA-E-5}, Lemma
\ref{AL-E-1}.(a) and \ref{AL-E-2}.(h),
\begin{align}
& \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\rho(P_{K})^{\prime}Q_{K}%
^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-2}HQ_{K}^{-1}\rho(P_{K})\nonumber\\
& \leq\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\omega_{\min}%
^{-2}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L})\rho(P_{K})^{\prime}Q_{K}%
^{-1}H^{\prime}Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})\nonumber\\
& \leq\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\omega_{\min}%
^{-2}(\widehat{Q}_{n,L})\omega_{\max}(Q_{L})\mathbb{E}\left[ (v_{\Gamma_{n}%
}^{\ast}(w_{1}))^{2}\right] =O_{p}(1). \label{P-AT-E1-37}%
\end{align}
By Lemma \ref{AL-E-1}.(a), (\ref{P-AT-E1-36}), (\ref{P-AT-E1-37}) and the
Cauchy-Schwarz inequality%
\begin{align}
& \left\vert (n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\rho
(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}((\widehat{Q}_{n,L})^{-1}-Q_{L}^{-1}%
)R_{n}U_{n}\right\vert \nonumber\\
& =\left\vert (n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\rho
(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-1}(\widehat{Q}%
_{n,L}-Q_{L})Q_{L}^{-1}R_{n}U_{n}\right\vert \nonumber\\
& \leq\left\Vert \left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-1}\rho
(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}(\widehat{Q}_{n,L})^{-1}\right\Vert
\left\Vert \widehat{Q}_{n,L}-Q_{L}\right\Vert \left\Vert n^{-1}Q_{L}^{-1}%
R_{n}U_{n}\right\Vert \nonumber\\
& =O_{p}(\zeta_{L}L^{1/2}n^{-1})=o_{p}(n^{-1/2}) \label{P-AT-E1-38}%
\end{align}
where the last equality is by Assumption \ref{AA-E-7}. By (\ref{P-AT-E1-29}),
(\ref{P-AT-E1-30}), (\ref{P-AT-E1-32}), (\ref{P-AT-E1-33}) and
(\ref{P-AT-E1-38}),
\begin{align}
\langle\widehat{h}_{n}-h_{o},u_{\Gamma_{n}}^{\ast}\rangle_{\varphi} &
=(n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\rho(P_{K})^{\prime}%
Q_{K}^{-1}H^{\prime}Q_{K}Q_{L}^{-1}R_{n}U_{n}+o_{p}(n^{-1/2})\nonumber\\
& =(n\left\Vert v_{n}^{\ast}\right\Vert _{sd})^{-1}\sum_{i=1}^{n}%
v_{\Gamma_{n}}^{\ast}(w_{1,i})u_{i}+o_{p}(n^{-1/2})\nonumber\\
& =\mu_{n}\left\{ \Delta_{\varphi}(Z_{1},h_{o})[u_{\Gamma_{n}}^{\ast
}]\right\} +o_{p}(n^{-1/2}) \label{P-AT-E1-39}%
\end{align}
where the second equality is by the definition of $v_{\Gamma_{n}}^{\ast}$, and
the third equality is by the definition of $\Delta_{\varphi}(Z_{1}%
,h_{o})[u_{\Gamma_{n}}^{\ast}]$. This verifies Assumption 3.3.(i) in HLR. To
verify Assumption 3.3.(ii) in HLR, we notice that
\begin{align}
& n^{-\frac{1}{2}}\sum_{i=1}^{n}\left\{ \Delta_{\varphi}(Z_{1,i}%
,h_{o})[u_{\Gamma_{n}}^{\ast}]+\Delta_{\psi}(Z_{2,i},g_{o},h_{o})[u_{g_{n}%
}^{\ast}]\right\} \nonumber\\
& =n^{-\frac{1}{2}}\sum_{i=1}^{n}\left\{ u_{\Gamma_{n}}^{\ast}(w_{1,i}%
)u_{i}+u_{g_{n}}^{\ast}(w_{i})\eta_{i}\right\} . \label{P-AT-E1-40}%
\end{align}
To show the asymptotic normality of the above partial sum, we apply the
Lindbergh-Feller CLT. By the Cauchy-Schwarz inequality, Assumption
\ref{AA-E-5}, Lemma \ref{AL-E-2}.(h)
\begin{align}
\frac{\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert v_{\Gamma_{n}}^{\ast}%
(w_{1})\right\vert ^{2}}{\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}} &
\leq\zeta_{L}^{2}\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{-2}\left\Vert
Q_{L}^{-1}HQ_{K}^{-1}\rho(P_{K})\right\Vert ^{2}\nonumber\\
& \leq\omega_{\min}^{-1}(Q_{L})\zeta_{L}^{2}\left\Vert v_{n}^{\ast
}\right\Vert _{sd}^{-2}\rho(P_{K})^{\prime}Q_{K}^{-1}H^{\prime}Q_{L}%
^{-1}HQ_{K}^{-1}\rho(P_{K})\nonumber\\
& =\frac{C\zeta_{L}^{2}}{\omega_{\min}(Q_{L})}\frac{\mathbb{E}\left[
(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] }{\left\Vert v_{g_{n}}^{\ast
}\right\Vert _{2}^{2}}=O(\zeta_{L}^{2}), \label{P-AT-E1-41}%
\end{align}
where the first equality is by the definition of $v_{\Gamma_{n}}^{\ast}$. By
Assumption \ref{AA-E-5}, (\ref{P-AT-E1-41}), Lemma \ref{AL-E-2}%
.(f)-\ref{AL-E-2}.(h),
\begin{align}
& \frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1})u+v_{g_{n}}^{\ast
}(w)\eta)^{4}\right] }{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{4}%
}\nonumber\\
& \leq8\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1})u)^{4}\right]
+\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w)\eta)^{4}\right] }{n\left\Vert
v_{n}^{\ast}\right\Vert _{sd}^{4}}\nonumber\\
& \leq8C\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1}))^{4}\right]
+\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{4}\right] }{n\left\Vert v_{n}%
^{\ast}\right\Vert _{sd}^{4}}\nonumber\\
& \leq8C\frac{\sup_{w_{1}}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}+\sup
_{w}(v_{g_{n}}^{\ast}(w))^{2}}{n\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}%
}\frac{\mathbb{E}\left[ (v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right]
+\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{2}\right] }{\left\Vert v_{n}^{\ast
}\right\Vert _{sd}^{2}}\nonumber\\
& =O((\zeta_{L}^{2}+\xi_{0,K}^{2})n^{-1})=o(1) \label{P-AT-E1-42}%
\end{align}
where the equality is by Assumption \ref{AA-E-7}. This verifies the
Lindbergh's condition. Hence Assumption 3.3.(ii) in HLR follows by the i.i.d.
assumption and the Lindbergh-Feller CLT.{} Finally, we verify Assumption
3.3.(iii) in HLR. First, we have $\varepsilon_{2,n}=0$ because the estimators
in both the first step and the second step have closed form expressions. By
definition, $\delta_{2,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+L^{1/2}%
n^{-1/2}+L^{-s_{1}/d_{w_{1}}}$ which together with $K\rightarrow\infty$\ and
$L\rightarrow\infty$ implies that $n^{1/2}(\delta_{2,n}^{\ast})^{-1}=o(1)$.
Moreover by Lemma \ref{AL-E-2}.(e), $||u_{g_{n}}^{\ast}||_{\psi}%
=(\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{2}\right] )^{1/2}\left\Vert
v_{n}^{\ast}\right\Vert _{sd}^{-1}=O(1)$ which finishes verification of
Assumption 3.3.(iii) in HLR.
\end{proof}
\bigskip\
\begin{corollary}
\label{C-AL-E-1} Under Assumptions \ref{AA-E-1}-\ref{AA-E-7}, Assumptions
\ref{L-SA-4}-\ref{L-SA-6} hold.
\end{corollary}
\begin{proof}
[Proof of Lemma \ref{AL-E-0}]We first verify Assumption \ref{L-SA-4}. By
definition,
\begin{equation}
_{z_{1}\in\mathcal{Z}_{1},h\in\mathcal{N}_{h,n}}\left[ \left\vert \tau
(z_{1},h)\right\vert +\left\vert \tau(z_{1},h_{o})\right\vert \right] \leq2
\label{P-CAL-E1-1}%
\end{equation}
which shows that Assumption \ref{L-SA-4}.(i) holds. By definition, $\psi
^{\ast}(z_{2},\alpha)=-\frac{1}{2}(y-g(w_{h}))^{2}$, which implies that
\begin{equation}
\Delta_{\psi}^{\ast}(z_{2},\alpha)[v_{g,1}]=(y-g(w_{h}))v_{g,1}
\label{P-CAL-E1-2}%
\end{equation}
and
\begin{equation}
r_{\psi,g}^{\ast}(z_{2},\alpha)[v_{g,1},v_{g,2}]=-v_{g,1}v_{g,2}
\label{P-CAL-E1-3}%
\end{equation}
for any $v_{g,1}$, $v_{g,2}\in\mathcal{V}_{2}$, which implies that Assumption
\ref{L-SA-1}.(i) holds. Moreover%
\begin{align}
& \psi(z_{2},g^{\ast},h)-\psi(z_{2},\alpha)-\Delta_{\psi}(z_{2},\alpha
)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa_{n}^{2}r_{\psi,g}(z_{2},\alpha
)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]\nonumber\\
& =\tau(z_{1},h)\left[
\begin{array}
[c]{c}%
\psi^{\ast}(z_{2},g^{\ast},h)-\psi^{\ast}(z_{2},\alpha)\\
-\Delta_{\psi}^{\ast}(z_{2},\alpha)[\pm\kappa_{n}u_{g_{n}}^{\ast}]-\kappa
_{n}^{2}r_{\psi,g}^{\ast}(z_{2},\alpha)[u_{g_{n}}^{\ast},u_{g_{n}}^{\ast}]
\end{array}
\right] =0, \label{P-CAL-E1-5}%
\end{align}
for any $\alpha\in\mathcal{N}_{n}$ and any $z_{2}\in\mathcal{Z}_{2}$. This
means that Assumption \ref{L-SA-1}.(ii) holds for $\psi(z_{2},\alpha)$ with
$\Lambda_{1,n}(z_{2})=0$.\ By definition,%
\begin{align}
\Gamma(\alpha_{o})\left[ h-h_{o},u_{g_{n}}^{\ast}\right] & =\mathbb{E}%
\left[ \tau(w)\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast
}(w)\right] \nonumber\\
& =\mathbb{E}\left[ \tau(w)r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h_{o,n}%
-h_{o},u_{g_{n}}^{\ast}]\right] . \label{P-CAL-E1-5a}%
\end{align}
Therefore, Assumption \ref{L-SA-1}.(v) has been verified in (\ref{P-AT-E1-26a}%
) above. By (\ref{A_NPV_3}) and (\ref{P-CAL-E1-2}),
\begin{equation}
\mathbb{E}\left[ \left. \Delta_{\psi}^{\ast}(Z_{2},g_{o},h_{o})[u_{g_{n}%
}^{\ast}]\right\vert Z_{1}\right] =\mathbb{E}\left[ \left. \eta u_{g_{n}%
}^{\ast}(w)\right\vert x,w_{1}\right] =u_{g_{n}}^{\ast}(w)\mathbb{E}\left[
\left. \eta\right\vert x,w_{1}\right] =0 \label{P-CAL-E1-6}%
\end{equation}
which verifies Assumption \ref{L-SA-4}.(iii). By definition,%
\begin{equation}
r_{\psi,h}^{\ast}(z_{2},\alpha)[v_{g},v_{h}]=\partial_{u}g(w_{h})v_{g}v_{h}
\label{P-CAL-E1-7}%
\end{equation}
for any $z_{2}\in\mathcal{Z}_{2}$, any $\alpha\in\mathcal{N}_{\alpha}$, any
$v_{h}\in\mathcal{V}_{1}$ and any $v_{g}\in\mathcal{V}_{2}$, which implies
that Assumption \ref{L-SA-4}.(iv) holds. By the triangle inequality,
(\ref{AA-AP-2}) and (\ref{AL-E6-2}) in the proof of Lemma \ref{AL-E-6}, for
any $\alpha\in\mathcal{N}_{n}$,%
\begin{align}
& \left\vert \Delta_{\psi}(z_{2},g,h)[u_{g_{n}}^{\ast}]-\Delta_{\psi}%
(z_{2},g_{o},h)[u_{g_{n}}^{\ast}]-r_{\psi,g}(z_{2},g_{o},h)[g-g_{o},u_{g_{n}%
}^{\ast}]\right\vert \nonumber\\
& =\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (y-g(w_{h}))-(y-g_{o}%
(w_{h}))+(g(w)-g_{o}(w))\right) \right\vert \nonumber\\
& =\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (g_{o}(w_{h}%
)-g(w_{h}))+(g(w)-g_{o}(w))\right) \right\vert \nonumber\\
& \leq\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (g_{o,K}%
(w_{h})-g(w_{h}))-(g_{o,K}(w)-g(w))\right) \right\vert \nonumber\\
& +\left\vert \tau(w_{h})u_{g_{n}}^{\ast}(w)\left( (g_{o,K}(w_{h}%
)-g_{o}(w_{h}))-(g_{o,K}(w)-g_{o}(w))\right) \right\vert \nonumber\\
& \leq\xi_{1,K}\left\Vert \beta-\beta_{o,K}\right\Vert \left\vert
(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}\right\vert +CK^{-s/d}|u_{g_{n}}^{\ast
}|\nonumber\\
& \leq\xi_{1,K}\delta_{g,n}\left\vert (h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast
}\right\vert +CK^{-s/d}|u_{g_{n}}^{\ast}|. \label{P-CAL-E1-8}%
\end{align}
Let $\Lambda_{3,n}(z_{2},\alpha)=\xi_{1,K}\delta_{g,n}\left\vert
(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}\right\vert +CK^{-s/d}|u_{g_{n}}^{\ast
}|$. By Lemma \ref{AL-E-2}.(a), \ref{AL-E-2}.(f) and Assumption \ref{AA-E-7},
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\sum_{i=1}^{n}\Lambda_{3,n}(Z_{2,i},\alpha
)=o_{p}(n^{-1/2}). \label{P-CAL-E1-9}%
\end{equation}
Similarly, by Lemma \ref{AL-E-2}.(b), \ref{AL-E-2}.(e) and Assumption
\ref{AA-E-7},%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \Lambda_{3,n}(Z_{2}%
,\alpha)\right] =o(n^{-1/2}). \label{P-CAL-E1-10}%
\end{equation}
This verifies Assumption \ref{L-SA-4}.(v). By definition,
\begin{equation}
\mathbb{E}\left[ \left. (\Delta_{\psi}^{\ast}(Z_{2},\alpha_{o})[u_{g_{n}%
}^{\ast}])^{2}\right\vert Z_{1}=z_{1}\right] =(u_{g_{n}}^{\ast}%
(w))^{2}\mathbb{E}\left[ \left. \eta^{2}\right\vert Z_{1}=z_{1}\right] \leq
C\xi_{0,K}^{2} \label{P-CAL-E1-11}%
\end{equation}
where the inequality is by Assumption \ref{AA-E-5} and Lemma \ref{AL-E-2}.(g).
By Lemma \ref{AL-E-2}.(d), Assumption \ref{L-SA-4}.(vii) holds with
$\delta_{\tau,n}^{\ast}=\zeta_{L}\delta_{h,n}$, and $\delta_{\tau,n}^{\ast}%
\xi_{0,K}^{2}=o(1)$ follows by Assumption \ref{AA-E-7}.
We next verify Assumption \ref{L-SA-5}. By (\ref{P-CAL-E1-3}), Assumption
\ref{L-SA-5}.(i) holds with $\Lambda_{6,n}(z_{2},\alpha)=0$ for any $z_{2}%
\in\mathcal{Z}_{2}$ and any $\alpha\in\mathcal{N}_{n}$. This also means that
Assumptions \ref{L-SA-5}.(vii)-(viii) also hold for $\Lambda_{6,n}%
(z_{2},\alpha)$.\ By Assumption \ref{AA-E-3}, (\ref{P-CAL-E1-2}) and
(\ref{P-CAL-E1-7}),%
\begin{align}
& \left\vert \Delta_{\psi}^{\ast}(z_{2},g_{o},h)[u_{g_{n}}^{\ast}%
]-\Delta_{\psi}^{\ast}(z_{2},\alpha_{o})[u_{g_{n}}^{\ast}]-r_{\psi,h}^{\ast
}(z_{2},\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\vert \nonumber\\
& =\left\vert u_{g_{n}}^{\ast}(w)\left( (y-g_{o}(w_{h}))-(y-g_{o}%
(w))+\partial_{u}g_{o}(w)(h(w)-h_{o}(w))\right) \right\vert \nonumber\\
& =\left\vert u_{g_{n}}^{\ast}(w)\left( (g_{o}(w)-g_{o}(w_{h}))-\partial
_{u}g_{o}(w)(h_{o}(w)-h(w))\right) \right\vert \nonumber\\
& \leq C\left\vert u_{g_{n}}^{\ast}(w)(h(w)-h_{o}(w))^{2}\right\vert .
\label{P-CAL-E1-12}%
\end{align}
Let $\Lambda_{7,n}(z_{2},\alpha)=C\left\vert u_{g_{n}}^{\ast}(w)(h(w)-h_{o}%
(w))^{2}\right\vert $. Then by Lemma \ref{AL-E-2}.(a) and \ref{AL-E-2}.(g),
and Assumption \ref{AA-E-7},
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\sum_{i=1}^{n}\Lambda_{7,n}(Z_{2,i},\alpha
)=o_{p}(n^{-1/2}). \label{P-CAL-E1-13}%
\end{equation}
Similarly, by Lemma \ref{AL-E-2}.(b) and \ref{AL-E-2}.(g), and Assumption
\ref{AA-E-7},%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \Lambda_{7,n}(Z_{2}%
,\alpha)\right] =o_{p}(n^{-1/2}). \label{P-CAL-E1-14}%
\end{equation}
This shows that Assumptions \ref{L-SA-5}.(ii) and \ref{L-SA-5}.(vii)-(viii)
hold. For any $h\in\mathcal{N}_{h,n}$,
\begin{align}
& \left\vert (\tau(Z_{1},h)-\tau(Z_{1},h_{o}))r_{\psi,h}^{\ast}(Z_{2}%
,\alpha_{o})[h-h_{o},u_{g_{n}}^{\ast}]\right\vert \nonumber\\
& =\left\vert (\tau(w_{h})-\tau(w))\partial_{u}g_{o}(w)(h(w_{1})-h_{o}%
(w_{1}))u_{g_{n}}^{\ast}\right\vert \nonumber\\
& \leq C\zeta_{L}\xi_{0,K}\delta_{h,n}\left\vert \tau(w_{h})-\tau
(w)\right\vert \label{P-CAL-E1-15}%
\end{align}
where the inequality is by Assumption \ref{AA-E-5}, (\ref{PAL-E2-02}) in the
proof of Lemma \ref{AL-E-2}\ and Lemma \ref{AL-E-2}.(g). By (\ref{P-CAL-E1-15}%
),\ Lemma \ref{AL-E-2}.(c) and Assumption \ref{AA-E-7},
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}%
,h)-\tau(Z_{1},h_{o}))r_{\psi,h}^{\ast}(Z_{2},\alpha_{o})[h-h_{o},u_{g_{n}%
}^{\ast}]\right] \right\vert \leq C\zeta_{L}^{2}\xi_{0,K}\delta_{h,n}%
^{2}=o(n^{-1/2}), \label{P-CAL-E1-16}%
\end{equation}
which verifies Assumption \ref{L-SA-5}.(iii). By (\ref{AA-AP-2}) and Lemma
\ref{AL-E-1}.(d), for any $g\in\mathcal{N}_{g,n}$
\begin{align}
& \sup_{w}\left\vert (g(w)-g_{o}(w))u_{g_{n}}^{\ast}(w)\right\vert
\nonumber\\
& =\sup_{w}\left\vert \tau(w)(g(w)-g_{o}(w))u_{g_{n}}^{\ast}(w)\right\vert
\nonumber\\
& \leq\sup_{w\in\mathcal{T}_{w}}\left\vert \tau(w)(g(w)-g_{o,K}(w))u_{g_{n}%
}^{\ast}(w)\right\vert +\sup_{w\in\mathcal{T}_{w}}\left\vert \tau
(w)(g_{o,K}(w)-g_{o}(w))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\
& \leq\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{w\in
\mathcal{T}_{w}}\left\vert g(w)-g_{o,K}(w)\right\vert \nonumber\\
& +\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{w\in
\mathcal{T}_{w}}\left\vert g(w)-g_{o,K}(w)\right\vert \nonumber\\
& \leq C\xi_{0,K}(\left\Vert \beta-\beta_{o,K}\right\Vert \xi_{0,K}%
+K^{-s/d})\leq C\xi_{0,K}^{2}\delta_{g,n} \label{P-CAL-E1-17}%
\end{align}
where the first equality is by $\tau(w)^{2}=\tau(w)$, the first inequality is
by the triangle inequality, the third inequality is by (\ref{AA-AP-2}) and
Lemma \ref{AL-E-2}.(g), and the last inequality is by the definition of
$\mathcal{N}_{g,n}$. For any $\alpha\in\mathcal{N}_{n}$,
\begin{align}
& \left\vert (\tau(Z_{1},h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2}%
,\alpha_{o})[g-g_{o},u_{g_{n}}^{\ast}]\right\vert \nonumber\\
& =\left\vert (\tau(w_{h})-\tau(w))(g(w)-g_{o}(w))u_{g_{n}}^{\ast}\right\vert
\nonumber\\
& \leq C\xi_{0,K}^{2}\delta_{g,n}\left\vert \tau(Z_{1},h)-\tau(Z_{1}%
,h_{o})\right\vert \label{P-CAL-E1-18}%
\end{align}
where the inequality is by (\ref{P-CAL-E1-17}) and the definition of
$\mathcal{N}_{n}$. By (\ref{P-CAL-E1-15}),\ Lemma \ref{AL-E-2}.(c) and
Assumption \ref{AA-E-7},%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[ (\tau(Z_{1}%
,h)-\tau(Z_{1},h_{o}))r_{\psi,g}^{\ast}(Z_{2},\alpha_{o})[g-g_{o},u_{g_{n}%
}^{\ast}]\right] \right\vert \leq C\zeta_{L}\xi_{0,K}^{2}\delta_{h,n}%
\delta_{g,n}=o(n^{-1/2}), \label{P-CAL-E1-19}%
\end{equation}
which verifies Assumption \ref{L-SA-5}.(iv). By (\ref{P-CAL-E1-3}), Assumption
\ref{L-SA-5}.(v) holds with $\Lambda_{8,n}(z_{2},\alpha)=0$ for any $z_{2}%
\in\mathcal{Z}_{2}$ and any $\alpha\in\mathcal{N}_{n}$. By\ (\ref{P-CAL-E1-3})
and Lemma \ref{AL-E-2}.(e), Assumption \ref{L-SA-5}.(vi) also holds.
Assumptions \ref{L-SA-5}.(vii) and \ref{L-SA-5}.(viii) have been verified
together with Assumptions \ref{L-SA-5}.(i) and \ref{L-SA-5}.(ii).
Finally, we verify Assumption \ref{L-SA-6}. Let $h_{o,n}=h_{o,L}$. By
(\ref{P-CAL-E1-7}), \ref{AA-AP-1}), Assumption \ref{AA-E-7} and Lemma
\ref{AL-E-2}.(e)
\begin{align}
& \mathbb{E}\left[ \left\vert r_{\psi,h}^{\ast}(Z_{2},\alpha_{o}%
)[h_{o,L}-h_{o},u_{g_{n}}^{\ast}]\right\vert \right] \nonumber\\
& =\mathbb{E}\left[ \left\vert \partial_{u}g_{o}(w)(h_{o,L}-h_{o})u_{g_{n}%
}^{\ast}\right\vert \right] \nonumber\\
& \leq C(\mathbb{E}\left[ \left\vert (h_{o,L}-h_{o})^{2}\right\vert \right]
)^{1/2}(\mathbb{E}\left[ \left\vert (u_{g_{n}}^{\ast})^{2}\right\vert
\right] )^{1/2}\nonumber\\
& \leq CL^{-s_{1}/d_{w_{1}}}=o(n^{-1/2}) \label{P-CAL-E1-20}%
\end{align}
which verifies Assumption \ref{L-SA-6}.(i). Assumption \ref{L-SA-6}.(ii) can
be verified using the same arguments of the proof of Lemma \ref{AL-E-3}. Let
$g_{o,n}=g_{o,K}$. By (\ref{P-CAL-E1-3}), (\ref{AA-AP-2}), Assumption
\ref{AA-E-7} and Lemma \ref{AL-E-2}.(e)
\begin{align}
& \mathbb{E}\left[ \left\vert r_{\psi,g}^{\ast}(Z_{2},\alpha_{o}%
)[g_{o,K}-g_{o},u_{g_{n}}^{\ast}]\right\vert \right] \nonumber\\
& =\mathbb{E}\left[ \left\vert (g_{o,K}-g_{o})u_{g_{n}}^{\ast}\right\vert
\right] \nonumber\\
& \leq C(\mathbb{E}\left[ \left\vert (g_{o,K}-g_{o})^{2}\right\vert \right]
)^{1/2}(\mathbb{E}\left[ \left\vert (u_{g_{n}}^{\ast})^{2}\right\vert
\right] )^{1/2}\nonumber\\
& \leq CL^{-s/d}=o(n^{-1/2}) \label{P-CAL-E1-21}%
\end{align}
which verifies Assumption \ref{L-SA-6}.(iii). For any $h\in\mathcal{N}_{h,n}%
$,
\begin{align}
& \mathbb{E}\left[ \tau(Z_{1},h)^{2}(r_{\psi,h}^{\ast}(z_{2},\alpha
_{o})[h-h_{o,L},u_{g_{n}}^{\ast}])^{2}\right] \nonumber\\
& =\mathbb{E}\left[ \tau(w_{h})^{2}(\partial_{u}g_{o}(w)(h-h_{o,L})u_{g_{n}%
}^{\ast})^{2}\right] \nonumber\\
& \leq C\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert (h(w_{1})-h_{o,L}%
(w_{1}))^{2}\right\vert \mathbb{E}\left[ (u_{g_{n}}^{\ast})^{2}\right] \leq
C\zeta_{L}^{2}\delta_{h,n}^{2} \label{P-CAL-E1-22}%
\end{align}
where the first inequality is by $\tau(w_{h})^{2}<1$ for any\ $h\in
\mathcal{N}_{h,n}$ and Assumption \ref{AA-E-5}, the last inequality is by
(\ref{PAL-E2-02}) and Lemma \ref{AL-E-2}.(e). Moreover, for any $f\in
\mathcal{F}_{3,n}^{\ast}$,
\begin{equation}
\sup_{z_{2}\in\mathcal{Z}_{2}}\left\vert f(z_{2})\right\vert \leq\left(
\sup_{h\in\mathcal{N}_{h,n}}\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert
(h(w_{1})-h_{o,L}(w_{1}))^{2}\right\vert \right) \left( \sup_{w\in
\mathcal{W}}\left\vert (u_{g_{n}}^{\ast}(w))^{2}\right\vert \right) \leq
C\xi_{0,K}^{2}\zeta_{L}^{2}\delta_{h,n}^{2}, \label{P-CAL-E1-23}%
\end{equation}
which together with (\ref{P-CAL-E1-22}) and Assumption \ref{AA-E-7} implies
that
\begin{align}
& (\sup_{f\in\mathcal{F}_{3,n}^{\ast}}\mathbb{E}\left[ f^{2}\right]
+(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{3,n}^{\ast}(z_{2})|\log(n)n^{-1}%
)(K+L)\log(n)\nonumber\\
& \leq C(\zeta_{L}^{2}\delta_{h,n}^{2}(K+L)+(K+L)^{2}\xi_{0,K}^{2}\zeta
_{L}^{2}\delta_{h,n}^{2}\log(n)n^{-1})\log(n)=o(1). \label{P-CAL-E1-24}%
\end{align}
This verifies Assumption \ref{L-SA-6}.(v) for $\mathcal{F}_{3,n}^{\ast}$. For
any $h\in\mathcal{N}_{h,n}\ $and $g\in\mathcal{N}_{g,n}$,
\begin{equation}
\tau(z_{1},h)r_{\psi,g}^{\ast}(z_{2},\alpha_{o})[g-g_{o,K},u_{g_{n}}^{\ast
}]=\tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}(\beta-\beta_{o,K}).
\label{P-CAL-E1-25}%
\end{equation}
Hence Assumption \ref{L-SA-6}.(iv) can be verified using the same arguments of
Lemma \ref{AL-E-4}. For any $h\in\mathcal{N}_{h,n}\ $and $g\in\mathcal{N}%
_{g,n}$,
\begin{align}
& \mathbb{E}\left[ \tau(Z_{1},h)^{2}(r_{\psi,g}^{\ast}(z_{2},\alpha
_{o})[g-g_{o,K},u_{g_{n}}^{\ast}])^{2}\right] \nonumber\\
& =\mathbb{E}\left[ \tau(w_{h})^{2}((g-g_{o,K})u_{g_{n}}^{\ast})^{2}\right]
\nonumber\\
& \leq C\sup_{w_{1}\in\mathcal{W}_{1}}\left\vert \tau(w)(g(w)-g_{o,K}%
(w))^{2}\right\vert \mathbb{E}\left[ (u_{g_{n}}^{\ast})^{2}\right] \leq
C\xi_{0,K}^{2}\delta_{g,n}^{2} \label{P-CAL-E1-26}%
\end{align}
where the first inequality is by $\tau(w)^{2}=\tau(w)$ and $\tau(w_{h})^{2}<1$
for any $w$ and any $h\in\mathcal{N}_{h,n}$, the second inequality is by the
definition of $\mathcal{N}_{g,n}$ and Lemma \ref{AL-E-2}.(e). Moreover, for
any $f\in\mathcal{F}_{4,n}^{\ast}$,%
\begin{equation}
\sup_{z_{2}\in\mathcal{Z}_{2}}\left\vert f(z_{2})\right\vert \leq\left(
\sup_{g\in\mathcal{N}_{h,n}}\sup_{w\in\mathcal{T}_{w}}\left\vert
(g(w)-g_{o,K}(w))^{2}\right\vert \right) \left( \sup_{w\in\mathcal{W}%
}\left\vert (u_{g_{n}}^{\ast}(w))^{2}\right\vert \right) \leq C\xi_{0,K}%
^{4}\delta_{g,n}^{2}, \label{P-CAL-E1-27}%
\end{equation}
which together with (\ref{P-CAL-E1-26}) and Assumption \ref{AA-E-7} implies
that
\begin{align}
& (\sup_{f\in\mathcal{F}_{4,n}^{\ast}}\mathbb{E}\left[ f^{2}\right]
+(K+L)\sup_{z_{2}\in\mathcal{Z}_{2}}|F_{4,n}^{\ast}(z_{2})|\log(n)n^{-1}%
)(K+L)\log(n)\nonumber\\
& \leq C(\xi_{0,K}^{2}\delta_{g,n}^{2}(K+L)+(K+L)^{2}\xi_{0,K}^{4}%
\delta_{g,n}^{2}\log(n)n^{-1})\log(n)=o(1). \label{P-CAL-E1-28}%
\end{align}
This verifies Assumption \ref{L-SA-6}.(v) for $\mathcal{F}_{3,n}^{\ast}$.
\end{proof}
\bigskip
\begin{lemma}
\label{AL-E-0} Let $v_{\Gamma}^{\ast}(w_{1})=\mathbb{E}\left[ \tau
(w)v_{g}^{\ast}(w)\partial_{u}g_{o}(w)|w_{1}\right] $.\ Under Assumptions
\ref{AA-E-5} and \ref{AA-E-6}, we have
(a) $\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}]\rightarrow0$
as $K\rightarrow\infty$;
(b) $\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right]
\rightarrow\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] $ as
$K\rightarrow\infty$;
(c)\ $\mathbb{E}\left[ |v_{\Gamma_{n}}^{\ast}(w_{1})-v_{\Gamma}^{\ast}%
(w_{1})|^{2}\right] \rightarrow0$ as $K\rightarrow\infty$ and $L\rightarrow
\infty$;
(d) $\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right]
\rightarrow\mathbb{E}\left[ u^{2}(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] $ as
$K\rightarrow\infty$ and $L\rightarrow\infty$.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AL-E-0}](a) By the definition of $v_{g_{n}}^{\ast}$ and
Assumption \ref{AA-E-6},%
\begin{equation}
\mathbb{E}[\tau(w)P(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w))]=\mathbf{0}%
_{K\times1} \label{PAA-E-L1-1}%
\end{equation}
which immediately implies that
\begin{align}
& \mathbb{E}[\tau(w)|P(w)^{\prime}\beta_{g,K}-v_{g}^{\ast}(w)|^{2}%
]\nonumber\\
& =\mathbb{E}[\tau(w)|P(w)^{\prime}\beta_{g,K}-v_{g_{n}}^{\ast}%
(w)|^{2}]+\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}%
]\nonumber\\
& \geq\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}]
\label{PAA-E-L1-2}%
\end{align}
for any $\beta_{g,K}\in\mathbb{R}^{K}$. Hence as $K\rightarrow\infty$,
\begin{equation}
\mathbb{E}[\tau(w)|v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w)|^{2}]\leq
\mathbb{E}[\tau(w)|P(w)^{\prime}\beta_{v,K}-v_{g}^{\ast}(w)|^{2}]\rightarrow0,
\label{PAA-E-L1-3}%
\end{equation}
where $\beta_{v,K}$ is defined in Assumption \ref{AA-E-6}.
(b) By Assumption \ref{AA-E-5}, Jensen's inequality and H\"{o}lder's
inequality,%
\begin{align}
& \left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}%
(w)-v_{g}^{\ast}(w))v_{g}^{\ast}(w)\right] \right\vert \nonumber\\
& \leq C\mathbb{E}\left[ \tau(w)\left\vert (v_{g_{n}}^{\ast}(w)-v_{g}^{\ast
}(w))v_{g}^{\ast}(w)\right\vert \right] \nonumber\\
& \leq C(\mathbb{E}\left[ \tau(w)\left\vert (v_{g_{n}}^{\ast}(w)-v_{g}%
^{\ast}(w))^{2}\right\vert \right] \mathbb{E}\left[ \tau(w)(v_{g}^{\ast
}(w))^{2}\right] )^{1/2} \label{PAA-E-L1-4}%
\end{align}
which together with Assumption \ref{AA-E-6} and the result proved in (a)
implies that%
\begin{equation}
\left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast
}(w))v_{g}^{\ast}(w)\right] \right\vert \rightarrow0\text{ as }%
K\rightarrow\infty. \label{PAA-E-L1-5}%
\end{equation}
By the triangle inequality,
\begin{align}
& \left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}%
(w))^{2}\right] -\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}%
(w))^{2}\right] \right\vert \nonumber\\
& \leq\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast
}(w))^{2}\right] \nonumber\\
& +2\left\vert \mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}^{\ast}%
(w)-v_{g}^{\ast}(w))v_{g}^{\ast}(w)\right] \right\vert , \label{PAA-E-L1-6}%
\end{align}
which combined with the results in (\ref{PAA-E-L1-3}), (\ref{PAA-E-L1-4}) and
(\ref{PAA-E-L1-5}) proves the claim (b).
(c) Let $v_{\Gamma,L}^{\ast}(w_{1})=R(\cdot)^{\prime}Q_{L}^{-1}\mathbb{E}%
\left[ R(w_{1})\tau(w)\partial_{u}g_{o}(w)v_{g}^{\ast}(w)\right] $. Then%
\begin{equation}
v_{\Gamma_{n}}^{\ast}(w_{1})-v_{\Gamma,L}^{\ast}(w_{1})=R(\cdot)^{\prime}%
Q_{L}^{-1}\mathbb{E}\left[ R(w_{1})\tau(w)\partial_{u}g_{o}(w)(v_{g_{n}%
}^{\ast}(w)-v_{g}^{\ast}(w))\right] . \label{PAA-E-L1-7}%
\end{equation}
By the (matrix) Cauchy-Schwarz inequality, Assumption \ref{AA-E-5} and the
result proved in (a),
\begin{align}
\mathbb{E}\left[ |v_{\Gamma_{n}}^{\ast}(w_{1})-v_{\Gamma,L}^{\ast}%
(w_{1})|^{2}\right] & \leq\mathbb{E}\left[ \tau(w)(\partial_{u}%
g_{o}(w))^{2}(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}(w))^{2}\right] \nonumber\\
& \leq C\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w)-v_{g}^{\ast}%
(w))^{2}\right] \rightarrow0\text{ } \label{PAA-E-L1-8}%
\end{align}
as $K\rightarrow\infty$. Using the same arguments after display (A.9) of
Newey, Powell and Vella (1999) (their $b_{L}(z)$ and $\rho(z)$ are
$v_{\Gamma,L}^{\ast}(w_{1})$ and $v_{\Gamma}^{\ast}(w_{1})$ here
respectively), we can show that
\begin{equation}
\mathbb{E}\left[ |v_{\Gamma,L}^{\ast}(w_{1})-v_{\Gamma}^{\ast}(w_{1}%
)|^{2}\right] \rightarrow0\text{ as }L\rightarrow\infty\text{.}
\label{PAA-E-L1-9}%
\end{equation}
Combining the results in (\ref{PAA-E-L1-8}) and (\ref{PAA-E-L1-9}), we
immediately prove the claim in (c).
(d) The proof follows similar arguments in the proof of claim (b) and hence is omitted.
\end{proof}
Let $\widehat{Q}_{n,L}=n^{-1}R_{n}R_{n}^{\prime}$ and $\widehat{Q}%
_{n,K}=n^{-1}\widehat{P}_{n}^{\prime}\widehat{P}_{n}$, which are the
estimators of $Q_{L}=\mathbb{E}\left[ R(w_{1})R(w_{1})^{\prime}\right] $ and
$Q_{K}=\mathbb{E}\left[ \tau(w)P(w)P(w)^{\prime}\right] $ respectively. The
following Lemma is useful to verify the high-level conditions for the
asymptotic normality. The proof of the results in Lemmas \ref{AL-E-1}.(a) and
\ref{AL-E-1}.(b) are in Newey (1997) and the proof of the remaining results
are in\ Newey, Powell and Vella (1999).
\begin{lemma}
\label{AL-E-1} Let $\delta_{h,n}^{\ast}=L^{1/2}n^{-1/2}+L^{-s_{1}/d_{w_{1}}}$
and $\delta_{g,n}^{\ast}=K^{1/2}n^{-1/2}+K^{-s/d}+\delta_{h,n}^{\ast}$. Under
Assumptions \ref{AA-E-1}-\ref{AA-E-4}, we have
(a)\ $||\widehat{Q}_{n,L}-Q_{L}||=O_{p}(\zeta_{L}L^{1/2}n^{-1/2})$;
(b) $||\widehat{\gamma}_{n}-\gamma_{o,L}||=O_{p}(\delta_{h,n}^{\ast})$;
(c) $||\widehat{Q}_{n,K}-Q_{K}||=O_{p}(\xi_{1,K}^{2}(\delta_{h,n}^{\ast}%
)^{2}+K^{1/2}\xi_{1,K}\delta_{h,n}^{\ast}+\xi_{0,K}^{2}\zeta_{L}\delta
_{h,n}^{\ast})$;
(d)\ $||\widehat{\beta}_{n}-\beta_{o,K}||=O_{p}(\delta_{g,n}^{\ast})$;
(e) $n^{-1}\sum\nolimits_{i=1}^{n}\left\vert \widehat{\tau}_{i}-\tau
_{i}\right\vert =O_{p}(\zeta_{L}\delta_{h,n}^{\ast})$.
\end{lemma}
Recall that $\mathcal{N}_{\gamma,n}=\{ \gamma\in\mathbb{R}^{L}$:
$||\gamma-\gamma_{o,L}||\leq\delta_{h,n}\}$ and $\mathcal{N}_{\beta,n}=\{
\beta\in\mathbb{R}^{K}$: $||\beta-\beta_{o,K}||\leq\delta_{g,n}\}$ where
$\delta_{h,n}=\delta_{h,n}^{\ast}\varrho_{n}$, $\delta_{g,n}=\delta
_{g,n}^{\ast}\varrho_{n}$ and $\{ \varrho_{n}\}_{n}$ is a slowly divergent
real positive sequence.\ By Lemma \ref{AL-E-1}.(b) and Lemma \ref{AL-E-1}.(d),
we have $\widehat{\gamma}_{n}\in\mathcal{N}_{\gamma,n}$ and $\widehat{\beta
}_{n}\in\mathcal{N}_{\beta,n}$ wpa1. Define $\mathcal{N}_{h,n}=\{h\left(
\cdot\right) =R\left( \cdot\right) ^{\prime}\gamma$: $\gamma\in
\mathcal{N}_{\gamma,n}\}$ and $\mathcal{N}_{g,n}=\{g\left( \cdot\right)
=P\left( \cdot\right) ^{\prime}\beta$: $\beta\in\mathcal{N}_{\gamma,n}\}$.
The following Lemma is useful to verify the high-level conditions.
\begin{lemma}
\label{AL-E-2} Under Assumptions \ref{AA-E-1}-\ref{AA-E-6}, we have
(a)\ $\sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[
\left\vert R(w_{1})^{\prime}\gamma-h_{o}(w_{1})\right\vert ^{2}\right]
=O_{p}(\delta_{h,n}^{2})$;
(b) $\sup_{\gamma\in\mathcal{N}_{\gamma,n}}\mathbb{E}\left[ \left\vert
R(w_{1,i})^{\prime}\gamma-h_{o}(w_{1,i})\right\vert ^{2}\right]
=O(\delta_{h,n}^{2})$;
(c) $\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert \tau(w_{h}%
)-\tau(w)\right\vert \right] \leq C\zeta_{L}\delta_{h,n}$;
(d) $\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum\nolimits_{i=1}^{n}\left[
\left\vert \tau(w_{h,i})-\tau(w_{i})\right\vert \right] =O_{p}(\zeta
_{L}\delta_{h,n})$;
(e) $\mathbb{E}\left[ (v_{g_{n}}^{\ast}(w))^{2}\right] \leq C\left\Vert
v_{n}^{\ast}\right\Vert _{sd}^{2}$;
(f) $n^{-1}\sum_{i=1}^{n}(v_{g_{n}}^{\ast}(w_{i}))^{2}\left\Vert v_{n}^{\ast
}\right\Vert _{sd}^{-2}=O_{p}(1)$;
(g) $\sup_{w}\left\vert v_{g_{n}}^{\ast}(w)\left\Vert v_{n}^{\ast}\right\Vert
_{sd}^{-1}\right\vert \leq C\xi_{0,K}$;
(h) $\mathbb{E}\left[ \left\vert v_{\Gamma_{n}}^{\ast}(w_{1})\right\vert
^{2}\right] \leq C\left\Vert v_{n}^{\ast}\right\Vert _{sd}^{2}$.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AL-E-2}]Following Newey (1997) we assume without loss of
generality that $Q_{L}=I_{L}$ and $Q_{K}=I_{K}$. Such an assumption can be
verified under Assumption \ref{AA-E-2} for the power series and splines using
the arguments in the proof of Theorem 4 and Theorem 7 of Newey (1997) respectively.
(a) By Assumption \ref{AA-E-4}, Lemma \ref{AL-E-1}.(a), $Q_{L}=I_{L}$,\ the
Cauchy-Schwarz inequality, the definition of $\mathcal{N}_{\gamma,n}$ and
(\ref{AA-AP-1}),%
\begin{align*}
& \sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[
\left\vert R(w_{1,i})^{\prime}\gamma-h_{o}(w_{1,i})\right\vert ^{2}\right] \\
& \leq2\sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[
\left\vert R(w_{1,i})^{\prime}\gamma-h_{o,L}(w_{1,i})\right\vert ^{2}\right]
+2\sup_{\gamma\in\mathcal{N}_{\gamma,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert
h_{o,L}(w_{1,i})-h_{o}(w_{1,i})\right\vert ^{2}\right] \\
& \leq2\sup_{\gamma\in\mathcal{N}_{\gamma,n}}(\gamma-\gamma_{o,L})^{\prime
}\widehat{Q}_{n,L}(\gamma-\gamma_{o,L})+2Ck^{-s_{1}/d_{w_{1}}}\\
& \leq2\omega_{\max}(\widehat{Q}_{n,L})\sup_{\gamma\in\mathcal{N}_{\gamma,n}%
}\left\Vert \gamma-\gamma_{o,L}\right\Vert ^{2}+2Ck^{-s_{1}/d_{w_{1}}}%
=O_{p}(\delta_{h,n}^{2}),
\end{align*}
which proves the claim in (a).
(b)\ The proof follows similar arguments to those in the proof of (a) and is omitted.
(c) For any $h\left( \cdot\right) =P(\cdot)^{\prime}\gamma\in\mathcal{N}%
_{h,n}$,
\begin{align}
\left\vert h(w_{1})-h_{o}(w_{1})\right\vert & \leq\left\vert P(w_{1}%
)^{\prime}\gamma-h_{o,L}(w_{1})\right\vert +\left\vert h_{o,L}(w_{1}%
)-h_{o}(w_{1})\right\vert \nonumber\\
& \leq\zeta_{L}\left\Vert \gamma-\gamma_{o,L}\right\Vert +CL^{-s_{1}%
/d_{w_{1}}}\leq C\zeta_{L}\delta_{h,n} \label{PAL-E2-02}%
\end{align}
which implies that%
\begin{align}
\left\vert \tau(w_{h})-\tau(w)\right\vert & \leq\left\vert I\left\{ u\leq
b+P(w_{1})^{\prime}\gamma-h_{o}(w_{1})\right\} -I\{u\leq b\} \right\vert
\nonumber\\
& +\left\vert I\left\{ u\geq a+P(w_{1})^{\prime}\gamma-h_{o}(w_{1})\right\}
-I\{u\geq a\} \right\vert \nonumber\\
& \leq I\left\{ \left\vert u-b\right\vert \leq\left\vert P(w_{1})^{\prime
}\gamma-h_{o}(w_{1})\right\vert \right\} \nonumber\\
& +I\left\{ \left\vert u-a\right\vert \leq\left\vert P(w_{1})^{\prime}%
\gamma-h_{o}(w_{1})\right\vert \right\} \nonumber\\
& \leq I\left\{ \left\vert u-b\right\vert \leq C\zeta_{L}\delta
_{h,n}\right\} +I\left\{ \left\vert u-a\right\vert \leq C\zeta_{L}%
\delta_{h,n}\right\} , \label{PAL-E2-2}%
\end{align}
where $\zeta_{L}\delta_{h,n}=o(1)$ by Assumption \ref{AA-E-7}. As the density
of $u$ is bounded in the local neighborhoods of $a$ and $b$ (which is assumed
in Lemma A3 of Newey, Powell and Vella (1999)), by (\ref{PAL-E2-2}) we get
\begin{equation}
\mathbb{E}\left[ \sup_{h\in\mathcal{N}_{h,n}}\left\vert \tau(w_{h}%
)-\tau(w)\right\vert \right] \leq C\zeta_{L}\delta_{h,n} \label{PAL-E2-3}%
\end{equation}
which finishes the proof.
(d) By (\ref{PAL-E2-3}) and the Markov inequality we immediately get the
asserted result.
(e)\ By the definition of $\eta$ and Assumption \ref{AA-E-5}, $\mathbb{E}%
\left[ \eta^{2}|x,w_{1}\right] \geq C_{\eta}$ where $C_{\eta}$ is a finite
positive constant.\ Thus
\begin{align}
\mathbb{E}[(v_{g_{n}}^{\ast}(w))^{2}\left\Vert v_{n}^{\ast}\right\Vert
_{sd}^{-2}] & =\frac{\mathbb{E}[(v_{g_{n}}^{\ast}(w))^{2}]}{\mathbb{E}%
\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +\mathbb{E}\left[
\eta^{2}\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\
& \leq\frac{\mathbb{E}[(v_{g_{n}}^{\ast}(w))^{2}]}{\mathbb{E}\left[
u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[
\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\
& =\frac{\mathbb{E}[\tau(w)(v_{g_{n}}^{\ast}(w))^{2}]}{\mathbb{E}\left[
u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[
\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\leq C_{\eta}^{-1} \label{PAL-E2-4}%
\end{align}
where the second equality is by the definition of $v_{g_{n}}^{\ast}$ and
$\tau(w)^{2}=\tau(w)$.
(f) The asserted result follows by (e) and the Markov inequality.
(g) By the Cauchy-Schwarz inequality and Assumption \ref{AA-E-5},
\begin{align}
\left\vert v_{g_{n}}^{\ast}(w)\right\vert ^{2}\left\Vert v_{n}^{\ast
}\right\Vert _{sd}^{-2} & =\frac{\left\vert \tau(w)P(w)^{\prime}Q_{K}%
^{-1}\rho(P_{K})\right\vert ^{2}}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}%
^{\ast}(w_{1}))^{2}\right] +\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g_{n}}%
^{\ast}(w))^{2}\right] }\nonumber\\
& \leq\frac{\rho(P_{K})^{\prime}Q_{K}^{-2}\rho(P_{K})\left\Vert
\tau(w)P(w)\right\Vert ^{2}}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast
}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast
}(w))^{2}\right] }\nonumber\\
& \leq\frac{\xi_{0,K}^{2}\rho(P_{K})^{\prime}Q_{K}^{-2}\rho(P_{K}%
)}{\mathbb{E}\left[ u^{2}(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta
}\mathbb{E}\left[ \tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\
& \leq\frac{\xi_{0,K}^{2}\omega_{\min}^{-1}(Q_{K})\mathbb{E}\left[
\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }{\mathbb{E}\left[ u^{2}%
(v_{\Gamma_{n}}^{\ast}(w_{1}))^{2}\right] +C_{\eta}\mathbb{E}\left[
\tau(w)(v_{g_{n}}^{\ast}(w))^{2}\right] }\nonumber\\
& \leq\xi_{0,K}^{2}\omega_{\min}^{-1}(Q_{K})C_{\eta}^{-1} \label{PAL-E2-5}%
\end{align}
for any $w$. This combined with $Q_{K}=I_{K}$ immediately proves the claim.
(h) By Lemmas \ref{AL-E-0}.(b) and \ref{AL-E-0}.(d),
\begin{equation}
\mathbb{E}[\left\vert v_{\Gamma_{n}}^{\ast}(w_{1})\right\vert ^{2}\left\Vert
v_{n}^{\ast}\right\Vert _{sd}^{-2}]\rightarrow\frac{\mathbb{E}\left[
(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] }{\mathbb{E}\left[ \eta^{2}%
\tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}\left[ u^{2}(v_{\Gamma}%
^{\ast}(w_{1}))^{2}\right] } \label{PAL-E2-6}%
\end{equation}
as $K\rightarrow\infty$ and $L\rightarrow\infty$, where $v_{\Gamma}^{\ast
}(w_{1})=\mathbb{E}\left[ \tau(w)v_{g}^{\ast}(w)\partial_{u}g_{o}%
(w)|w_{1}\right] $. By Assumption \ref{AA-E-5} and Jensen's inequality,%
\begin{equation}
\mathbb{E}\left[ (v_{\Gamma}^{\ast}(w_{1}))^{2}\right] \leq C\mathbb{E}%
\left[ (\mathbb{E}\left[ \tau(w)v_{g}^{\ast}(w)|w_{1}\right] )^{2}\right]
\leq C\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] .
\label{PAL-E2-7}%
\end{equation}
By Assumption \ref{AA-E-5}, $\mathbb{E}\left[ \eta^{2}|x,w_{1}\right] \geq
C_{\eta}$ where $C_{\eta}$ is a finite positive constant, which together with
(\ref{PAL-E2-7}) implies that
\begin{align}
& \frac{\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] }%
{\mathbb{E}\left[ \eta^{2}\tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}%
\left[ u^{2}(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] }\nonumber\\
& \leq\frac{\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] }{C_{\eta
}\mathbb{E}\left[ \tau(w)(v_{g}^{\ast}(w))^{2}\right] +\mathbb{E}\left[
u^{2}(v_{\Gamma}^{\ast}(w_{1}))^{2}\right] }\leq C_{\eta}^{-1}.
\label{PAL-E2-8}%
\end{align}
The asserted claim follows from (\ref{PAL-E2-6}) and (\ref{PAL-E2-8}).
\end{proof}
\bigskip
\begin{lemma}
\label{AL-E-3} Define $\mathcal{F}_{1,n}=\{(x,w_{1})\mapsto\partial_{u}%
g_{o}(w)\tau(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w):h\in
\mathcal{N}_{h,n}\}$. Then the uniform entropy numbers of $\mathcal{F}_{1,n}$
satisfies%
\begin{equation}
\sup_{Q}N(\varepsilon\left\Vert F_{1,n}\right\Vert _{Q,2},\mathcal{F}%
_{1,n},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any }\varepsilon\in(0,1],
\label{P-SQRL1-1}%
\end{equation}
where $C$ is a finite fixed constant, $Q$ ranges over all finitely-discrete
probabilities measures and $F_{1,n}$ denotes the envelope of $\mathcal{F}%
_{1,n}$.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AL-E-3}]Let $\tau(x,w_{2})=\prod\nolimits_{j=1}%
^{d_{w_{2}}+1}I\{a_{j}\leq w_{j}\leq b_{j}\}$, $a=a_{d_{w_{2}}+2}$ and
$b=b_{d_{w_{2}}+2}$. Then by definition,
\begin{equation}
\tau(w_{h})=\tau(x,w_{2})I\{a\leq x-h(w_{1})\leq b\}. \label{AL-E3-0}%
\end{equation}
Define
\begin{align}
\mathcal{F}_{11,n} & =\{(x,w_{1})\mapsto I\{x\leq b+R(w_{1})^{\prime}%
\gamma:\gamma\in\mathcal{N}_{\gamma,n}\};\label{AL-E3-1}\\
\mathcal{F}_{12,n} & =\{(x,w_{1})\mapsto I\{x\geq a+R(w_{1})^{\prime}%
\gamma:\gamma\in\mathcal{N}_{\gamma,n}\};\label{AL-E3-2}\\
\mathcal{F}_{13,n} & =\{(x,w_{1})\mapsto\tau(x,w_{2})\partial_{u}%
g_{o}(w)(R(w_{1})^{\prime}\gamma-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w):\gamma
\in\mathcal{N}_{\gamma,n}\}. \label{AL-E3-3}%
\end{align}
Then by Lemmas 2.6.15 and 2.6.18 in van der Vaart and Wellner (1996), the
VC-dimentions of $\mathcal{F}_{11,n}$, $\mathcal{F}_{12,n}$ and $\mathcal{F}%
_{13,n}$ are of order $L$. By\ Theorem 2.6.7 in van der Vaart and Wellner
(1996), the uniform entropy number of $\mathcal{F}_{1j,n}$ satisfies
\begin{equation}
\sup_{Q}N(\varepsilon\left\Vert F_{1j,n}\right\Vert _{Q,2},\mathcal{F}%
_{1j,n},L_{2}(Q))\leq(C/\varepsilon)^{CL}\ \text{for any }\varepsilon\in(0,1],
\label{AL-E3-4}%
\end{equation}
where $C$ is a universal constant and $F_{1j,n}$ denotes the envelope of
$\mathcal{F}_{1j,n}$ for $j=1,2,3$.\ Because
\begin{equation}
\mathcal{F}_{1,n}\subset\{f_{1}f_{2}f_{3}:f_{1}\in\mathcal{F}_{11,n},f_{2}%
\in\mathcal{F}_{12,n},f_{3}\in\mathcal{F}_{13,n}\}, \label{AL-E3-5}%
\end{equation}
by (A.6) and (A.7) in Andrews (1994),
\begin{align}
& \sup_{Q}N(\varepsilon\left\Vert F_{11,n}F_{12,n}F_{13,n}\right\Vert
_{Q,2},\mathcal{F}_{1,n},L_{2}(Q))\nonumber\\
& \leq\prod\nolimits_{j=1}^{3}\sup_{Q}N(\varepsilon\left\Vert F_{1j,n}%
\right\Vert _{Q,2}/3,\mathcal{F}_{1j,n},L_{2}(Q))\leq(C/\varepsilon)^{CL}
\label{AL-E3-6}%
\end{align}
where the second inequality is by (\ref{AL-E3-4}). This proves
(\ref{P-SQRL1-1}) with $F_{1,n}=F_{11,n}F_{12,n}F_{13,n}$.
\end{proof}
\bigskip
\begin{lemma}
\label{AL-E-4} Define $\mathcal{F}_{2,n}=\{(x,w_{1})\mapsto\tau(w_{h}%
)u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha:h\in\mathcal{N}_{h,n}$, $\alpha
\in\mathbb{S}^{K-1}\}$, where $\mathbb{S}^{K-1}=\{ \alpha\in\mathbb{R}%
^{K}:\alpha^{\prime}\alpha=1\}$. Then the uniform entropy numbers of
$\mathcal{F}_{2,n}$ satisfies%
\begin{equation}
\sup_{Q}N(\varepsilon\left\Vert F_{2,n}\right\Vert _{Q,2},\mathcal{F}%
_{2,n},L_{2}(Q))\leq(C/\varepsilon)^{C(L+K)}\ \text{for any }\varepsilon
\in(0,1], \label{P-SQRL1-2}%
\end{equation}
where $C$ is a finite fixed constant, $Q$ ranges over all finitely-discrete
probabilities measures and $F_{2,n}$ denotes the envelope of $\mathcal{F}%
_{2,n}$.
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AL-E-4}]Define%
\begin{equation}
\mathcal{F}_{21,n}=\{(x,w_{1})\mapsto\tau(x,w_{2})u_{g_{n}}^{\ast
}(w)P(w)^{\prime}\alpha:\alpha\in\mathbb{S}^{K-1}\}, \label{AL-E4-1}%
\end{equation}
where $\tau(x,w_{2})$ is defined in the proof of Lemma \ref{AL-E-3}. Then by
Lemmas 2.6.15 and 2.6.18 in van der Vaart and Wellner (1996), the VC-dimension
of $\mathcal{F}_{21,n}$ is of order $K$. By\ Theorem 2.6.7 in van der Vaart
and Wellner (1996), the uniform entropy number of $\mathcal{F}_{21,n}$
satisfies
\begin{equation}
\sup_{Q}N(\varepsilon\left\Vert F_{21,n}\right\Vert _{Q,2},\mathcal{F}%
_{21,n},L_{2}(Q))\leq(C/\varepsilon)^{CK}\ \text{for any }\varepsilon\in(0,1],
\label{AL-E4-2}%
\end{equation}
where $C$ is a universal constant and $F_{21,n}$ denotes the envelope of
$\mathcal{F}_{21,n}$. The rest of the proof is the same as Lemma \ref{AL-E-3},
because
\begin{equation}
\mathcal{F}_{2,n}\subset\{f_{1}f_{2}f_{3}:f_{1}\in\mathcal{F}_{11,n},f_{2}%
\in\mathcal{F}_{12,n},f_{3}\in\mathcal{F}_{21,n}\}, \label{AL-E4-3}%
\end{equation}
where $\mathcal{F}_{11,n}$ and $\mathcal{F}_{12,n}$ are defined in
(\ref{AL-E3-1}) and (\ref{AL-E3-2}) respectively. Hence (\ref{P-SQRL1-2})
holds with $F_{2,n}=F_{11,n}F_{12,n}F_{21,n}$.
\end{proof}
\bigskip
\begin{lemma}
\label{AL-E-5} Under Assumptions \ref{AA-E-1}-\ref{AA-E-7},
\[
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h}%
)(g_{o}(w)-g_{o}(w_{h}))u_{g_{n}}^{\ast}(w)\right\} \right\vert
=o_{p}(n^{-1/2}).
\]
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AL-E-5}]Let $u_{h}=x-h(w_{1})$. As $u=x-h_{o}(w_{1})$, we
have $u-u_{h}=h(w_{1})-h_{o}(w_{1})$ by definition.
By\ Assumption\ \ref{AA-E-5},
\begin{equation}
\left\vert g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}%
(w_{1}))\right\vert \leq C\left\vert h(w_{1})-h_{o}(w_{1})\right\vert ^{2}
\label{AL-E5-1}%
\end{equation}
which together with the triangle inequality, Lemmas \ref{AL-E-2}.(a),
\ref{AL-E-2}.(b)\ and \ref{AL-E-2}.(g) implies that
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h}%
)(g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1})))u_{g_{n}%
}^{\ast}(w)\right\} \right\vert \nonumber\\
& \leq C\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert
h(w_{1,i})-h_{o}(w_{1,i})\right\vert ^{2}|u_{g_{n}}^{\ast}(w_{i})|\right]
\nonumber\\
& +C\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert h(w_{1}%
)-h_{o}(w_{1})\right\vert ^{2}|u_{g_{n}}^{\ast}(w)|\right] \nonumber\\
& \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{h\in
\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}^{n}\left[ \left\vert h(w_{1,i}%
)-h_{o}(w_{1,i})\right\vert ^{2}\right] \nonumber\\
& +C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{h\in
\mathcal{N}_{h,n}}\mathbb{E}\left[ \left\vert h(w_{1,i})-h_{o}(w_{1,i}%
)\right\vert ^{2}\right] \overset{}{=}O_{p}(\xi_{0,K}\delta_{h,n}^{2}).
\label{AL-E5-2}%
\end{align}
By Assumption \ref{AA-E-7}, $\xi_{0,K}\delta_{h,n}^{2}=o(n^{-1/2})$. Hence by
(\ref{AL-E5-2}) we have%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h}%
)(g_{o}(w)-g_{o}(w_{h})-\partial_{u}g_{o}(w)(h(w_{1})-h_{o}(w_{1})))u_{g_{n}%
}^{\ast}(w)\right\} \right\vert =o_{p}(n^{-1/2}). \label{AL-E5-3}%
\end{equation}
We next show that
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \tau(w_{h})\partial
_{u}g_{o}(w)(h(w)-h_{o}(w))u_{g_{n}}^{\ast}(w)\right\} \right\vert
=o_{p}(n^{-1/2}). \label{AL-E5-4}%
\end{equation}
Let $\mathcal{F}_{1,n}=\{(x,w_{1})\mapsto\partial_{u}g_{o}(w)\tau
(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w):h\in\mathcal{N}_{h,n}%
\}$.\ By Assumption \ref{AA-E-5}, Lemmas \ref{AL-E-2}.(b)\ and \ref{AL-E-2}%
.(g),%
\begin{align}
\sup_{f\in\mathcal{F}_{1,n}}\mathbb{E}\left[ f^{2}\right] & =\sup
_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ (\partial_{u}g_{o}(w)\tau
(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w))^{2}\right] \nonumber\\
& \leq C\sup_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ ((h(w_{1})-h_{o}%
(w_{1}))u_{g_{n}}^{\ast}(w))^{2}\right] \nonumber\\
& \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert ^{2}\sup
_{h\in\mathcal{N}_{h,n}}\mathbb{E}\left[ (h(w_{1})-h_{o}(w_{1}))^{2}\right]
\leq C\xi_{0,K}^{2}\delta_{h,n}^{2}. \label{AL-E5-5}%
\end{align}
Moreover, by the definition of $\mathcal{N}_{\gamma,n}$, (\ref{AA-AP-1}),
Assumption \ref{AA-E-5}, Lemmas \ref{AL-E-2}.(b)\ and \ref{AL-E-2}.(g),
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n}}\left\vert \partial_{u}g_{o}(w)\tau
(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\
& \leq C\sup_{h\in\mathcal{N}_{h,n}}\left\vert (h(w_{1})-h_{o}(w_{1}%
))u_{g_{n}}^{\ast}(w)\right\vert \nonumber\\
& \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{h\in
\mathcal{N}_{h,n}}\left[ \left\vert h(w_{1})-h_{o,L}(w_{1})\right\vert
+\left\vert h_{o,L}(w_{1})-h_{o}(w_{1})\right\vert \right] \nonumber\\
& \leq C\sup_{w}\left\vert u_{g_{n}}^{\ast}(w)\right\vert \sup_{\gamma
\in\mathcal{N}_{\gamma,n}}\left[ \xi_{0,K}\left\Vert \gamma-\gamma
_{o,L}\right\Vert +CL^{-s_{1}/d_{w_{1}}}\right] \leq C\xi_{0,K}^{2}%
\delta_{h,n}. \label{AL-E5-6}%
\end{align}
By Assumption \ref{AA-E-7},%
\begin{equation}
L\xi_{0,K}^{2}\delta_{h,n}^{2}\log(n)+\xi_{0,K}^{2}\delta_{h,n}L^{2}%
(\log(n))^{2}n^{-1}=o(1). \label{AL-E5-7}%
\end{equation}
Collecting the results in Lemma \ref{AL-E-3}, (\ref{AL-E5-5}), (\ref{AL-E5-6})
and (\ref{AL-E5-7}), we can use Lemma 22 of Belloni et. al (2016) to show
that
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n}}\left\vert \mu_{n}\left\{ \partial_{u}%
g_{o}(w)\tau(w_{h})(h(w_{1})-h_{o}(w_{1}))u_{g_{n}}^{\ast}(w)\right\}
\right\vert =o_{p}(n^{-1/2}). \label{AL-E5-8}%
\end{equation}
The asserted result follows by (\ref{AL-E5-4}), (\ref{AL-E5-8}) and the
triangle inequality.
\end{proof}
\bigskip
\begin{lemma}
\label{AL-E-6} Under Assumptions \ref{AA-E-1}-\ref{AA-E-7},
\[
\sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{
\tau(w_{h})(g_{o}(w_{h})-g(w_{h}))u_{g_{n}}^{\ast}\right\} \right\vert
=o_{p}(n^{-1/2}).
\]
\end{lemma}
\begin{proof}
[Proof of Lemma \ref{AL-E-6}]By the triangle inequality, (\ref{AA-AP-2}),
Lemmas \ref{AL-E-2}.(e)-(f)
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu
_{n}\left\{ \tau(w_{h})(g_{o}(w_{h})-g_{o,K}(w_{h}))u_{g_{n}}^{\ast
}(w)\right\} \right\vert \nonumber\\
& \leq CK^{-s/d}n^{-1}\sum_{i=1}^{n}\left[ \left\vert u_{g_{n}}^{\ast}%
(w_{i})\right\vert +\mathbb{E}\left[ \left\vert u_{g_{n}}^{\ast}%
(w_{i})\right\vert \right] \right] =o_{p}(n^{-1/2}), \label{AL-E6-1}%
\end{align}
where the equality is by Assumption \ref{AA-E-7}.\ By the first order
expansion and the Cauchy-Schwarz inequality, for any $g\in\mathcal{N}_{g,n}$,%
\begin{align}
& \left\vert \tau(w_{h})(g_{o,K}(w_{h})-g(w_{h})-g_{o,K}(w)+g(w))u_{g_{n}%
}^{\ast}(w)\right\vert \nonumber\\
& =\left\vert \tau(w_{h})(\beta-\beta_{o,K})^{\prime}(P(w_{h})-P(w))u_{g_{n}%
}^{\ast}(w)\right\vert \nonumber\\
& \leq\xi_{1,K}\left\Vert \beta-\beta_{o,K}\right\Vert \left\vert u_{g_{n}%
}^{\ast}(w)(h(w_{1})-h_{o}(w_{1}))\right\vert \label{AL-E6-2}%
\end{align}
which together with the definition of $\mathcal{N}_{h,n}$, the triangle
inequality and Lemmas \ref{AL-E-2}.(a) and \ref{AL-E-2}.(f)\ implies that%
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}n^{-1}\sum_{i=1}%
^{n}\left\vert \tau(w_{i,h})(g_{o,K}(w_{i,h})-g(w_{i,h})-g_{o,K}%
(w_{i})+g(w_{i}))u_{g_{n}}^{\ast}(w_{i})\right\vert \nonumber\\
& \leq\xi_{1,K}\delta_{g,n}\sup_{h\in\mathcal{N}_{h,n}}n^{-1}\sum_{i=1}%
^{n}\left\vert u_{g_{n}}^{\ast}(w_{i})(h(w_{1,i})-h_{o}(w_{1,i}))\right\vert
\nonumber\\
& \leq\xi_{1,K}\delta_{g,n}\sup_{h\in\mathcal{N}_{h,n}}\left( n^{-1}%
\sum_{i=1}^{n}\left\vert u_{g_{n}}^{\ast}(w_{i})\right\vert ^{2}n^{-1}%
\sum_{i=1}^{n}\left\vert h(w_{1,i})-h_{o}(w_{1,i})\right\vert ^{2}\right)
^{1/2}\nonumber\\
& =O_{p}(\xi_{1,K}\delta_{g,n}\delta_{h,n})=o_{p}(n^{-1/2}) \label{AL-E6-3}%
\end{align}
where the equality is by Assumption \ref{AA-E-7}. Similarly, we can show that
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\mathbb{E}\left[ \left\vert
\tau(w_{i,h})(g_{o,K}(w_{i,h})-g(w_{i,h})-g_{o,K}(w_{i})+g(w_{i}))u_{g_{n}%
}^{\ast}(w_{i})\right\vert \right] =o(n^{-1/2}), \label{AL-E6-4}%
\end{equation}
which together with (\ref{AL-E6-3}) implies that
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu_{n}\left\{
\tau(w_{h})(g_{o,K}(w_{h})-g(w_{h})-g_{o,K}(w)+g(w))u_{g_{n}}^{\ast
}(w)\right\} \right\vert =o_{p}(n^{-1/2}). \label{AL-E6-5}%
\end{equation}
Recall that $\mathcal{F}_{2,n}=\{(x,w_{1})\mapsto\tau(w_{h})u_{g_{n}}^{\ast
}(w)P(w)^{\prime}\alpha:h\in\mathcal{N}_{h,n}$, $\alpha\in\mathbb{S}^{K-1}\}$,
where $\mathbb{S}^{K-1}=\{ \alpha\in\mathbb{R}^{K}:\alpha^{\prime}\alpha=1\}$.
By Lemma \ref{AL-E-2}.(g) and $\tau(w)^{2}=\tau(w)$,
\begin{align}
\sup_{f\in\mathcal{F}_{2,n}}\mathbb{E}\left[ f^{2}\right] & =\sup
_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\mathbb{E}\left[
(\tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha)^{2}\right] \nonumber\\
& \leq\sup_{w}(u_{g_{n}}^{\ast}(w))^{2}\sup_{\alpha\in\mathbb{S}^{K-1}%
}\mathbb{E}\left[ (\tau(w)P(w)^{\prime}\alpha)^{2}\right] \leq C\xi
_{0,K}^{2}. \label{AL-E6-6}%
\end{align}
Similarly,%
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\left\vert \tau
(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha\right\vert \leq\sup_{\alpha
\in\mathbb{S}^{K-1}}\left\vert u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha
\right\vert \leq C\xi_{0,K}^{2}. \label{AL-E6-7}%
\end{equation}
Collecting the results in Lemma \ref{AL-E-4}, (\ref{AL-E6-6}) and
(\ref{AL-E6-7}), we can use Lemma 22 of Belloni et. al (2016) to show that
\begin{equation}
\sup_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\left\vert \mu
_{n}\left\{ \tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha\right\}
\right\vert =O_{p}((L+K)^{1/2}\xi_{0,K}(\log(n))^{1/2}n^{-1/2}).
\label{AL-E6-8}%
\end{equation}
By the definition of $\mathcal{N}_{g,n}$ and (\ref{AL-E6-8}),
\begin{align}
& \sup_{h\in\mathcal{N}_{h,n},g\in\mathcal{N}_{g,n}}\left\vert \mu
_{n}\left\{ \tau(w_{h})(g_{o,K}(w)-g(w))u_{g_{n}}^{\ast}(w)\right\}
\right\vert \nonumber\\
& \leq\sup_{h\in\mathcal{N}_{h,n},\alpha\in\mathbb{S}^{K-1}}\left\vert
\mu_{n}\left\{ \tau(w_{h})u_{g_{n}}^{\ast}(w)P(w)^{\prime}\alpha\right\}
\right\vert \sup_{\beta\in\mathcal{N}_{\beta,n}}\left\Vert \beta-\beta
_{o,K}\right\Vert \nonumber\\
& =O_{p}(\delta_{g,n}(L+K)^{1/2}\xi_{0,K}(\log(n))^{1/2}n^{-1/2}%
)=o_{p}(n^{-1/2}) \label{AL-E6-9}%
\end{align}
where the second equality is by Assumption \ref{AA-E-7}. Collecting the
results in (\ref{AL-E6-1}), (\ref{AL-E6-5}) and (\ref{AL-E6-9}), and applying
the triangle inequality, we immediately prove the asserted result.
\end{proof}
\section{Extra Simulation Results}
In this section, we study the finite sample performance of the two-step
nonparametric M estimator and the proposed inference method when the
nonparametric regressor may have unbounded support.\ The simulated data is
from the following model%
\begin{align}
y_{i} & =w_{1,i}\theta_{o}+m_{o}(h_{o}(x_{i}))+u_{i},\label{MC-1}\\
s_{i} & =h_{o}(x_{i})+\varepsilon_{i}, \label{MC-2}%
\end{align}
where $\theta_{o}=1$; $h_{o}(x)=2\cos(\pi x)$, $m_{o}(w_{2})=\sin(\pi w_{2})$
and $w_{2}=h_{o}(x)$.\ For $i=1,\ldots,n$, we independently draw
$(w_{1,i},x_{\ast,i},u_{i},\varepsilon_{i})^{\prime}$ from $N(0,I_{4})$ and
then calculate
\begin{equation}
x_{i}=2^{-1/2}(w_{1,i}+x_{\ast,i}). \label{MC-3A}%
\end{equation}
The data $\left\{ y_{i},s_{i},w_{1,i},x_{i}\right\} _{i=1}^{n}$ are
generated using the equations in (\ref{MC-1}) and (\ref{MC-2}).
The first-step and second-step nonparametric estimators and the consistent
variance estimator take the same forms as their counterparts in Section 7 of
HLR and hence are omitted here. We consider sample sizes $n=100$, $250$ and
$500$ in this simulation study. For each sample size, we generate 10000
simulated samples to evaluate the performances of the two-step sieve estimator
and the proposed inference procedure. For each simulated sample, we calculate
the sieve estimator of $(\theta_{o},m_{o})$, and the 0.90 confidence interval
of $\theta_{o}$ for each combination of $(L,K)$ where $L=2,\ldots,16$ and
$K=2,\ldots,21$. The simulation results are reported in Figures 4.1 and 4.2.%
%TCIMACRO{\TeXButton{B}{\begin{figure}[tbp] \centering}}%
%BeginExpansion
\begin{figure}[tbp] \centering
%EndExpansion
$%
\begin{array}
[c]{c}%
\text{Figure 4.1. The Mean Squared Errors of the Two-step Sieve M Estimators
of }m_{o}\text{ and }\theta_{o}\text{ (DGP2)}\\
\\%
%TCIMACRO{\FRAME{itbpF}{5.7588in}{7.0621in}{0in}{}{}{dgp2_{m}se.png}%
%{\special{ language "Scientific Word"; type "GRAPHIC"; display "USEDEF";
%valid_file "F"; width 5.7588in; height 7.0621in; depth 0in;
%original-width 8.0004in; original-height 9.6997in; cropleft "0";
%croptop "1"; cropright "1"; cropbottom "0";
%filename 'graphics/DGP2_MSE.png';file-properties "XNPEU";}} }%
%BeginExpansion
{\includegraphics[
natheight=9.699700in,
natwidth=8.000400in,
height=7.0621in,
width=5.7588in
]%
{graphics/DGP2_MSE.png}%
}
%EndExpansion
\end{array}
$%
%TCIMACRO{\TeXButton{caption}{\caption{\small
%{1. The left panel represents the MSEs of the two-step sieve estimator of $m_{o}%
%$ for sample sizes n=100, 250 and 500 respectively; 2. the right panel represents the MSEs of the two-step sieve estimator of $\theta
%_{o}$ for sample sizes n=100, 250 and 500 respectively; 3. $L^{*}%
%$ and $K^{*}%
%$ denote the numbers of the series terms which produce sieve estimator of $m_{o}%
%$ with the smallest finite sample MSE (in the left panel) or sieve estimator of $\theta
%_{o}%
%$ with the smallest finite sample MSE (in the left panel); 4. the dotted line represents the MSE of the two-step sieve M estimator with $L=L^{*}%
%$ and $K=K^{*}%
%$; 5. the solid line represents the MSE of the two-step sieve M estimator with $L$ and $K$ selected by 5-fold cross-validation.}%
%}}}%
%BeginExpansion
\caption{\small
{1. The left panel represents the MSEs of the two-step sieve estimator of $m_{o}%
$ for sample sizes n=100, 250 and 500 respectively; 2. the right panel represents the MSEs of the two-step sieve estimator of $\theta
_{o}$ for sample sizes n=100, 250 and 500 respectively; 3. $L^{*}%
$ and $K^{*}%
$ denote the numbers of the series terms which produce sieve estimator of $m_{o}%
$ with the smallest finite sample MSE (in the left panel) or sieve estimator of $\theta
_{o}%
$ with the smallest finite sample MSE (in the left panel); 4. the dotted line represents the MSE of the two-step sieve M estimator with $L=L^{*}%
$ and $K=K^{*}%
$; 5. the solid line represents the MSE of the two-step sieve M estimator with $L$ and $K$ selected by 5-fold cross-validation.}%
}%
%EndExpansion%
%TCIMACRO{\TeXButton{E}{\end{figure}}}%
%BeginExpansion
\end{figure}%
%EndExpansion
%
%TCIMACRO{\TeXButton{B}{\begin{figure}[tbp] \centering}}%
%BeginExpansion
\begin{figure}[tbp] \centering
%EndExpansion
$%
\begin{array}
[c]{c}%
\text{Figure 4.2. The Convergence Probability and the Average Length of the
Confidence Interval of }\theta_{o}\text{ (DGP2)}\\
\\%
%TCIMACRO{\FRAME{itbpF}{5.7769in}{7.0621in}{0in}{}{}{dgp2_{c}i.png}%
%{\special{ language "Scientific Word"; type "GRAPHIC"; display "USEDEF";
%valid_file "F"; width 5.7769in; height 7.0621in; depth 0in;
%original-width 8.1993in; original-height 9.6997in; cropleft "0";
%croptop "1"; cropright "1"; cropbottom "0";
%filename 'graphics/DGP2_CI.png';file-properties "XNPEU";}} }%
%BeginExpansion
{\includegraphics[
natheight=9.699700in,
natwidth=8.199300in,
height=7.0621in,
width=5.7769in
]%
{graphics/DGP2_CI.png}%
}
%EndExpansion
\end{array}
$%
%TCIMACRO{\TeXButton{caption}{\caption{\small
%{1. The left panel presents the coverage probability of the confidence interval of $\theta
%_{o}%
%$ for sample sizes n=100, 250 and 500 respectively; 2. the right panel presents the average length of the confidence interval of $\theta
%_{o}%
%$ for sample sizes n=100, 250 and 500 respectively; 3. the dotted line in the left panel is the 0.90 line which represents the nominal coverage of the confidence interval; 4. the solid line represents the coverage probability of the confidence interval based on the two-step sieve estimator with $K$ and $L$ selected by 5-fold cross-validation.}%
%}}}%
%BeginExpansion
\caption{\small
{1. The left panel presents the coverage probability of the confidence interval of $\theta
_{o}%
$ for sample sizes n=100, 250 and 500 respectively; 2. the right panel presents the average length of the confidence interval of $\theta
_{o}%
$ for sample sizes n=100, 250 and 500 respectively; 3. the dotted line in the left panel is the 0.90 line which represents the nominal coverage of the confidence interval; 4. the solid line represents the coverage probability of the confidence interval based on the two-step sieve estimator with $K$ and $L$ selected by 5-fold cross-validation.}%
}%
%EndExpansion%
%TCIMACRO{\TeXButton{E}{\end{figure}}}%
%BeginExpansion
\end{figure}%
%EndExpansion
The properties of the two-step sieve M estimator and the proposed confidence
interval are similar to what we found in the other DGP employed in HLR. We
list some important differences. First, when the unknown function estimated in
the first-step has unbounded support, the optimal $L$ which produces a
two-step M estimator with the smallest MSE is much larger. Second, the ratio
between the MSE of the cross-validated estimator of $m_{o}$ and the optimal
MSE does not seem to converge to 1 in all the sample sizes we considered.
However, the MSE of the cross-validated estimator of $\theta_{o}$ does
approach the optimal value quickly as the sample size increases. Third, when
$L$ is small (e.g., $L=4$), the proposed confidence interval over-covers the
unknown parameter $\theta_{o}$ and its length diverges with increasing $K$.
Fourth, the coverage probability of the confidence interval based on the
cross-validated sieve estimator is almost identical to the nominal level even
when the sample size is small (e.g., $n=100$).
\section{Consistency and Convergence Rate\label{CR}}
In this appendix, we first derive the consistency of the second-step sieve M
estimator\textbf{ }$\widehat{g}_{n}$\textbf{ }under the metric\textbf{
}$\left\Vert \cdot\right\Vert _{\mathcal{G}}$ defined on $\mathcal{G}$. Given
the consistency, we then focus on a local neighborhood of $g_{o}$ to calculate
the convergence rate of $\widehat{g}_{n}$. Under mild conditions, the
first-step sieve M estimator $\widehat{h}_{n}$ is consistent (see, e.g.,
Theorem 3.1 of Chen, 2007), and also has rate of convergence under a
pseudo-metric $\left\Vert \cdot\right\Vert _{\mathcal{H}}$.\footnote{See,
e.g., Shen and Wong (1994) and Chen and Shen (1998) for the convergence rate
of the one-step (approximate) sieve M estimator for i.i.d. data and weakly
dependent data respectively.} Let $\delta_{h,n}^{\ast}=O(\varepsilon_{1,n})$
be a small positive number that goes to zero as $n\rightarrow\infty$. Without
loss of generality we denote $||\widehat{h}_{n}-h_{o}||_{\mathcal{H}}%
=O_{p}(\delta_{h,n}^{\ast})$ as the convergence rate. Hence we can assume that
$\widehat{h}_{n}$ belongs to a shrinking neighborhood\textbf{ }$\mathcal{N}%
_{h,n}=\{h\in\mathcal{H}_{n}:\left\Vert h-h_{o}\right\Vert _{\mathcal{H}}%
\leq\delta_{h,n}\}$\textbf{ }of\textbf{ }$h_{o}$\textbf{ }wpa1, where\textbf{
}$\delta_{h,n}=\delta_{h,n}^{\ast}\log(\log(n))=o(1)$.
\subsection{Consistency of the second step sieve M estimation}
The following conditions are sufficient for the consistency of\textbf{
}$\widehat{g}_{n}$\textbf{ }under $\left\Vert \cdot\right\Vert _{\mathcal{G}}$.
\begin{assumption}
\label{C1} (i)\ $\mathbb{E}\left[ \psi\left( Z_{2},g_{o},h_{o}\right)
\right] >-\infty$ and if $\mathbb{E}\left[ \psi\left( Z_{2},g_{o}%
,h_{o}\right) \right] =\infty$, then $\mathbb{E}\left[ \psi\left(
Z_{2},g,h_{o}\right) \right] <\infty$ for all $g\in\mathcal{G}_{n}%
\backslash\left\{ g_{o}\right\} $ and for all $n\geq1$; (ii) for all
$\varepsilon>0$, there exists some non-increasing positive sequence
$c_{n}(\varepsilon)$ such that for all $n\geq1$
\begin{equation}
\mathbb{E}\left[ \psi\left( Z_{2},g_{o},h_{o}\right) \right]
-\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}%
\geq\varepsilon\right\} }\mathbb{E}\left[ \psi\left( Z_{2},g,h_{o}\right)
\right] \geq c_{n}(\varepsilon) \label{ID}%
\end{equation}
and $\lim\inf_{n}c_{n}(\varepsilon)>0$ for all $\varepsilon>0$.
\end{assumption}
Assumption \ref{C1} is the identification uniqueness condition for $g_{o}$.
For sieve M estimation a similar condition can be found in White and
Wooldridge (1991). This assumption is stronger than Condition 3.1 of Theorem
3.1 in Chen (2007) and Condition a of Lemma A.2 in Chen and Pouzo (2012),
because it requires $c_{n}(\varepsilon)$ to be bounded away from zero for all
large $n$. It essentially requires that the second step sieve M estimation is
well-posed under the strong metric $\left\Vert \cdot\right\Vert _{\mathcal{G}%
}$.
\begin{assumption}
\label{C2} (i) $g_{o}\in\mathcal{G}\ $and $\left\Vert \cdot\right\Vert
_{\mathcal{G}}$ is a metric defined on $\mathcal{G}$ or some metric space
containing $\mathcal{G}$; (ii) $\mathcal{G}_{n}\subset\mathcal{G}_{n+1}%
\subset\mathcal{G}$ for all $n\geq1$ and there exists some $g_{n}%
\in\mathcal{G}_{n}$ such that
\begin{equation}
\left\vert \mathbb{E}\left[ \psi(Z_{2},g_{n},h_{o})-\psi(Z_{2},g_{o}%
,h_{o})\right] \right\vert =O(\eta_{2,n}) \label{SIA}%
\end{equation}
where $\eta_{2,n}\ $is some finite positive non-increasing sequence.
\end{assumption}
Assumption \ref{C2} imposes conditions on the sieve spaces. It is essentially
Condition b of Lemma A.2 in Chen and Pouzo (2012). It is also implied by
Conditions 3.2 and 3.3 of Theorem 3.1 in Chen (2007). The condition in
(\ref{SIA}) is clearly implied by the convergence rate of the sieve
approximation error of $\left\Vert g_{n}-g_{o}\right\Vert _{s,2}$ and the
continuity of the criterion function $\mathbb{E}\left[ \psi\left(
Z_{2},g,h_{o}\right) \right] $ for all $g\in\mathcal{G}_{n}$ in the local
neighborhood of $g_{o}$. In the following we denote $\mu_{n}\left[
\psi\left( Z_{2},g,h\right) \right] \equiv\frac{1}{n}\sum_{i=1}^{n}\left\{
\psi\left( Z_{2,i},g,h\right) -\mathbb{E}\left[ \psi\left( Z_{2}%
,g,h\right) \right] \right\} $.
\begin{assumption}
\label{C3} (i) $\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert
\mu_{n}\left[ \psi(Z_{2},g,h)\right] \right\vert =O_{p}(\eta_{0,n})$ where
$\left\{ \eta_{0,n}\right\} \ $is some finite positive non-increasing
sequence going to zero; (ii) there is a finite positive non-increasing
sequence $\left\{ \eta_{1,n}\right\} $ going to zero such that%
\[
\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert \mathbb{E}\left[
\psi(Z_{2},g,h)-\psi\left( Z_{2},g,h_{o}\right) \right] \right\vert
=O(\eta_{1,n}).
\]
\end{assumption}
Assumption \ref{C3} is similar to Condition 3.5 of Theorem 3.1 in Chen (2007)
and the first part of Condition d of Lemma A.2 in Chen and Pouzo (2012).
Assumption \ref{C3}.(i) can be verified by applying a standard empirical
process result. Assumption \ref{C3}.(ii)\ can be verified by the convergence
rate of the first-step sieve M estimator $\widehat{h}_{n}$ and the continuity
of the criterion function $\mathbb{E}\left[ \psi\left( Z_{2},g,h\right)
\right] $ in $h\in\mathcal{N}_{h,n}$ uniformly over $g\in\mathcal{G}_{n}$.
\begin{theorem}
\label{CST} Let Assumptions \ref{C1}, \ref{C2} and \ref{C3} hold. If%
\begin{equation}
\max\left\{ \eta_{0,n},\eta_{1,n},\eta_{2,n},\varepsilon_{2,n}^{2}\right\}
=o(1) \label{RT}%
\end{equation}
then the second-step sieve M estimator is consistent under $\left\Vert
\cdot\right\Vert _{\mathcal{G}}$, i.e. $\left\Vert \widehat{g}_{n}%
-g_{o}\right\Vert _{\mathcal{G}}=o_{p}(1)$.
\end{theorem}
\begin{proof}
[Proof of Theorem \ref{CST}]Let $Q_{n}\left( g,h\right) \equiv\frac{1}%
{n}\sum\nolimits_{i=1}^{n}\psi\left( Z_{2,i},g,h\right) $ and $Q\left(
g,h\right) \equiv\mathbb{E}\left[ \psi\left( Z_{2},g,h\right) \right] $.
Let $I_{n}(\varepsilon)\equiv\Pr\left( \left\Vert \widehat{g}_{n}%
-g_{o}\right\Vert _{\mathcal{G}}>\varepsilon\right) $. For any $\varepsilon
>0$, by the definition of $\widehat{g}_{n}$, we have%
\begin{equation}
I_{n}(\varepsilon)\leq\Pr\left( \sup_{\left\{ g\in\mathcal{G}_{n}:\text{
}||g-g_{o}||_{\mathcal{G}}\geq\varepsilon\right\} }Q_{n}(g,\widehat{h}%
_{n})\geq Q_{n}(g_{n},\widehat{h}_{n})-O_{p}\left( \varepsilon_{2,n}%
^{2}\right) \right) . \label{P-THM1-0}%
\end{equation}
Rewrite the inequality inside the parentheses on the RHS as
\begin{equation}
-\left[ Q_{n}(g_{n},\widehat{h}_{n})-Q\left( g_{o},h_{o}\right) \right]
+O_{p}\left( \varepsilon_{2,n}^{2}\right) \geq Q\left( g_{o},h_{o}\right)
-\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}%
\geq\varepsilon\right\} }Q_{n}(g,\widehat{h}_{n}). \label{P-THM1-1}%
\end{equation}
Note that the first two terms on the LHS of the above inequality can be
rewritten as
\begin{align*}
& -\left[ Q_{n}(g_{n},\widehat{h}_{n})-Q\left( g_{o},h_{o}\right) \right]
\\
& =-\mu_{n}\left[ \psi(Z_{2},g_{n},\widehat{h}_{n})\right] -\left[
Q(g_{n},\widehat{h}_{n})-Q\left( g_{n},h_{o}\right) \right] -\left[
Q\left( g_{n},h_{o}\right) -Q\left( g_{o},h_{o}\right) \right]
\end{align*}
which implies that if $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ with probability
approaching 1 (wpa1), then
\begin{equation}
-\left[ Q_{n}(g_{n},\widehat{h}_{n})-Q\left( g_{o},h_{o}\right) \right]
\leq I_{1,n}+I_{2,n}+I_{3,n}, \label{P-THM1-2}%
\end{equation}
where%
\begin{align*}
I_{1,n} & \equiv\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert
\mu_{n}\left[ \psi\left( Z_{2},g,h\right) \right] \right\vert ,\\
I_{2,n} & \equiv\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert
Q\left( g,h\right) -Q\left( g,h_{o}\right) \right\vert ,\\
I_{3,n} & \equiv\left\vert Q(g_{n},h_{o})-Q(g_{o},h_{o})\right\vert .
\end{align*}
Similarly if $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ wpa1, then for any
$g\in\mathcal{G}_{n}$,
\begin{align}
Q_{n}(g,\widehat{h}_{n}) & =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}%
_{n})\right] +\left[ Q(g,\widehat{h}_{n})-Q(g,h_{o})\right] +Q(g,h_{o}%
)\nonumber\\
& \leq\sup_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert \mu
_{n}\left[ \psi\left( Z_{2},g,h\right) \right] \right\vert +\sup
_{g\in\mathcal{G}_{n},h\in\mathcal{N}_{h,n}}\left\vert Q(g,h)-Q(g,h_{o}%
)\right\vert +Q(g,h_{o})\nonumber\\
& =I_{1,n}+I_{2,n}+Q(g,h_{o}). \label{P-THM1-3}%
\end{align}
Therefore when $\widehat{h}_{n}\in\mathcal{N}_{h,n}$ wpa1, we may note that
the term on the RHS of (\ref{P-THM1-1}) is such that
\begin{align}
& Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}%
||_{\mathcal{G}}\geq\varepsilon\right\} }Q_{n}(g,\widehat{h}_{n})\nonumber\\
& \geq-I_{1,n}-I_{2,n}+Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}%
_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}\geq\varepsilon\right\} }Q(g,h_{o}).
\label{P-THM1-4}%
\end{align}
From (\ref{P-THM1-0}), (\ref{P-THM1-1}), (\ref{P-THM1-2}) and (\ref{P-THM1-4}%
), we get%
\begin{equation}
I_{n}(\varepsilon)\leq\Pr\left( 2\sum\limits_{j=1}^{3}I_{j,n}+O_{p}%
(\varepsilon_{2,n}^{2})\geq Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}%
_{n}:\text{ }||g-g_{o}||_{\mathcal{G}}\geq\varepsilon\right\} }%
Q(g,h_{o})\right) +\Pr\left( \widehat{h}_{n}\notin\mathcal{N}_{h,n}\right)
. \label{P-THM1-5}%
\end{equation}
If $Q(g_{o},h_{o})=\infty$, then using Assumption \ref{C1}.(i), we
have\textbf{ }%
\begin{equation}
Q(g_{o},h_{o})-\sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }||g-g_{o}%
||_{\mathcal{G}}\geq\varepsilon\right\} }Q(g,h_{o})=\infty. \label{P-THM1-6}%
\end{equation}
However, from Assumption \ref{C2}.(ii) and \ref{C3}, we get\textbf{ }%
$\max\{I_{1,n},I_{2,n},I_{3,n}\}=O_{p}(1)$\textbf{, }which together with
(\ref{P-THM1-5}), (\ref{P-THM1-6}), $\varepsilon_{2,n}=o(1)$ and the
definition of\textbf{ }$\mathcal{N}_{h,n}$\textbf{ }implies that%
\[
I_{n}(\varepsilon)\leq\Pr\left( \widehat{h}_{n}\notin\mathcal{N}%
_{h,n}\right) \rightarrow0\text{ as }n\rightarrow\infty\text{.}%
\]
\textbf{ }On the other hand, if $Q(g_{o},h_{o})<\infty$, then using
(\ref{P-THM1-5}) and Assumption \ref{C1}.(ii), we get
\begin{equation}
I_{n}(\varepsilon)\leq\Pr\left( \frac{2I_{1,n}+2I_{2,n}+2I_{3,n}%
+O_{p}(\varepsilon_{2,n}^{2})}{c_{n}(\varepsilon)}\geq1\right) +\Pr\left(
\widehat{h}_{n}\notin\mathcal{N}_{h,n}\right) . \label{P-THM1-7}%
\end{equation}
Assumption \ref{C1}.(ii), Assumption \ref{C2}.(ii), Assumption \ref{C3} and
the condition (\ref{RT}) imply that%
\[
\frac{2I_{1,n}+2I_{2,n}+2I_{3,n}+O_{p}(\varepsilon_{2,n}^{2})}{c_{n}%
(\varepsilon)}=o_{p}(1)
\]
for any $\varepsilon>0$. Combining this result with (\ref{P-THM1-7}) and the
definition of $\mathcal{N}_{h,n}$, we conclude that $I_{n}(\varepsilon
)\rightarrow0$ as $n$ goes to infinity. This finishes the proof.
\end{proof}
\subsection{Rate of convergence of the second step sieve M estimation}
After the consistency of the second-step sieve M estimator $\widehat{g}_{n}$
is established, we can focus on the local neighborhood of $g_{o}$ to compute
the convergence rate of $\widehat{g}_{n}$ under $\left\Vert \cdot\right\Vert
_{\mathcal{G}}$. Let $K_{2}$ be a generic finite and positive constant and
define%
\[
\mathcal{N}_{2,K_{2}}\equiv\left\{ g\in\mathcal{G}_{n}:||g-g_{o}%
||_{\mathcal{G}}\leq K_{2}\right\} ,
\]
then by the consistency of $\widehat{g}_{n}$, we have $\widehat{g}_{n}%
\in\mathcal{N}_{2,K_{2}}$ wpa1. Moreover, given the convergence rate
$\delta_{1,n}^{\ast}$ of the first-step sieve M estimator $\widehat{h}_{n}$,
we can define
\[
\mathcal{N}_{1,K_{1}}\equiv\left\{ h\in\mathcal{H}_{n}:||h-h_{o}%
||_{\mathcal{H}}/\delta_{h,n}^{\ast}\leq K_{1}\right\}
\]
such that for any small constant $\omega>0$, there is a finite constant
$K_{\omega}>0$ such that
\begin{equation}
\Pr(\widehat{h}_{n}\notin\mathcal{N}_{1,K_{\omega}})\leq\omega\text{ for all
}n\text{.} \label{TIG}%
\end{equation}
The following general conditions are sufficient for deriving the convergence
rate of $\widehat{g}_{n}$.
\begin{assumption}
\label{R1} There are some finite, positive and non-increasing sequences
$\delta_{1,n}$, $\delta_{2,n}$ and $\delta_{n}$ that go to zero as
$n\rightarrow\infty$ such that the following hold for any fixed finite
constants $K_{1}>0,$ $K_{2}>0$:\ (i)%
\begin{equation}
\sup_{h\in\mathcal{N}_{1,K_{1}}}\left\vert \mathbb{E}\left[ \psi(Z_{2}%
,g_{n},h)-\psi(Z_{2},g_{o},h)\right] \right\vert =O(\delta_{2,n}^{2});
\label{AR1-0}%
\end{equation}
(ii)\ for any small constant $\delta,\widetilde{\delta}>0$ and for any
$g\in\mathcal{N}_{2,K_{2}}$ with $0<\widetilde{\delta}<\left\Vert
g-g_{o}\right\Vert _{\mathcal{G}}<\delta$
\begin{equation}
\sup_{h\in\mathcal{N}_{1,K_{1}}}\mathbb{E}\left[ \psi\left( Z_{2}%
,g,h\right) -\psi\left( Z_{2},g_{o},h\right) \right] \leq c_{K_{1}%
,1}\delta_{1,n}\delta-c_{K_{1},2}\delta^{2}, \label{AR1-1}%
\end{equation}
where $c_{K_{1},1}\ $and $c_{K_{1},2}>0$ are finite constants only depending
on $K_{1}$; (iii)%
\begin{equation}
\sup_{g\in\mathcal{N}_{2,K_{2}},h\in\mathcal{N}_{1,K_{1}}}\left\vert \mu
_{n}\left[ \psi(Z,g,h)-\psi(Z,g,h_{o})\right] \right\vert =O_{p}(\delta
_{n}^{2}); \label{AR1-2}%
\end{equation}
(iv) for all $n$ large enough and for any sufficiently small $\delta$,
\begin{equation}
\mathbb{E}\left[ \sup_{\left\{ g\in\mathcal{N}_{2,K_{2}}:\text{ }\left\Vert
g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta\right\} }\left\vert \mu
_{n}\left[ \psi(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\right] \right\vert \right]
\leq\frac{c_{1}\phi_{n}(\delta)}{\sqrt{n}} \label{AR1-4}%
\end{equation}
where $c_{1}>0$ is some finite constant and $\phi_{n}(\cdot)$ is some function
such that $\delta^{-\gamma}\phi_{n}(\delta)$ is a decreasing function for some
$\gamma\in(0,2)$.
\end{assumption}
Assumption \ref{R1}.(i) imposes local smoothness condition on the function
$\mathbb{E}\left[ \psi\left( Z_{2},\cdot,h\right) \right] $ uniformly over
$h$ in some shrinking neighborhood. The rate $\delta_{2,n}$ is determined by
the convergence rates of the sieve approximation error of $g_{o}$ and the
first step sieve estimator $\widehat{h}_{n}$. Assumption \ref{R1}.(ii) is a
local identification condition. The term $\delta_{1,n}$ on the right side of
the inequality (\ref{AR1-1}) represents the effect of first-step estimation on
the second-step sieve estimate $\widehat{g}_{n}$. In Assumption \ref{R1}.(i),
(ii) and (iii), the uniform convergence is imposed over local neighborhoods
$\mathcal{N}_{1,K_{1}}$ and/or $\mathcal{N}_{2,K_{2}}$. That is particularly
useful for establishing the convergence rate of $\widehat{g}_{n}$, because by
the consistency of $\widehat{g}_{n}$ and the convergence rate of $\widehat
{h}_{n}$, we can bound the probabilities of the events $\{ \widehat{g}%
_{n}\notin\mathcal{N}_{2,K_{2}}\}$ and $\{ \widehat{h}_{n}\notin
\mathcal{N}_{1,K_{1}}\}$ in finite samples by choosing sufficiently large
$K_{1}$ and $K_{2}$. Assumption \ref{R1}.(iv) is a stochastic equicontinuity
condition which is similar to the one in Theorem 3.4.1 of Van der Vaart and
Wellner (1996).
\begin{theorem}
\label{T-rate} Suppose that the conditions in Theorem \ref{CST} and Assumption
\ref{R1} are satisfied. Furthermore, if $\left\Vert g_{n}-g_{o}\right\Vert
_{\mathcal{G}}=O(\delta_{2,n}^{\ast})$\ where $\delta_{2,n}^{\ast}$ is defined
below and there is a finite, positive and non-increasing sequence
$\delta_{g,n}$ such that
\begin{equation}
\left( \delta_{g,n}\right) ^{-2}\phi_{n}(\delta_{g,n})\leq c_{2}\sqrt{n},
\label{DR1}%
\end{equation}
then we have $\left\Vert \widehat{g}_{n}-g_{o}\right\Vert _{\mathcal{G}}%
=O_{p}\left( \delta_{2,n}^{\ast}\right) $, where $\delta_{2,n}^{\ast}%
\equiv\max\left\{ \delta_{1,n},\delta_{2,n},\delta_{n},\delta_{g,n}%
,\varepsilon_{2,n}\right\} $.
\end{theorem}
\begin{proof}
[Proof of Theorem \ref{T-rate}]Let $\omega>0$ be some arbitrarily small
constant. Because $\widehat{g}_{n}$ is consistent, we can choose a
sufficiently large constant $K_{M}>0$ such that%
\begin{equation}
\Pr\left( ||\widehat{g}_{n}-g_{o}||_{\mathcal{G}}>K_{M}\right) \leq\omega.
\label{P-RT-1a}%
\end{equation}
By $\left\Vert g_{n}-g_{o}\right\Vert _{\mathcal{G}}=o(1)$, we deduce that
there is some sufficiently large $K_{g_{o}}$ such that $||g_{n}-g_{o}%
||_{\mathcal{G}}\leq K_{g_{o}}$. Let $K_{M}^{\ast}=\max\{K_{M},K_{g_{o}}\}$,%
\[
\mathcal{G}_{n}(M)\equiv\left\{ g\in\mathcal{G}_{n}:2^{M}\delta_{2,n}^{\ast
}<||g-g_{o}||_{\mathcal{G}}\leq K_{M}^{\ast}\right\}
\]
and $I_{M,n}\left( \omega\right) \equiv\Pr\left( ||\widehat{g}_{n}%
-g_{o}||_{\mathcal{G}}>2^{M}\delta_{2,n}^{\ast}\right) $. Note that by
(\ref{P-RT-1a}), we have
\begin{equation}
I_{M,n}\left( \omega\right) =\Pr\left( \widehat{g}_{n}\in\mathcal{G}%
_{n}\left( M\right) \right) +\Pr\left( ||\widehat{g}_{n}-g_{o}%
||_{\mathcal{G}}>K_{M}^{\ast}\right) \leq\Pr\left( \widehat{g}_{n}%
\in\mathcal{G}_{n}\left( M\right) \right) +\omega. \label{P-RT-1b}%
\end{equation}
We will prove that%
\begin{equation}
I_{M,n}\left( \omega\right) \leq\sum_{j\geq M,2^{j-1}\delta_{2,n}^{\ast}\leq
K_{M}^{\ast}}\frac{c_{1}c_{2}\left[ (2^{j+1})^{\gamma}+K_{\varepsilon
}^{\gamma}\right] }{\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}%
\right\vert }+5\omega\label{P-RT-1c}%
\end{equation}
where $c_{1}$ and $c_{2}$ are defined in Assumption \ref{R1}.(iv) and
(\ref{DR1}), $c_{K_{1},1}$, $c_{K_{1},2}$, $K_{\varepsilon}$ and $K$ are some
fixed finite constants which may depend on $\omega$, and $\gamma\in(0,2)$ is
defined in Assumption \ref{R1}.(iv). As $\gamma<2$, we can choose $M$
sufficiently large such that%
\[
\sum_{j\geq M,2^{j-1}\delta_{2,n}^{\ast}\leq K_{M}^{\ast}}\frac{c_{1}%
c_{2}\left[ (2^{j+1})^{\gamma}+K_{\varepsilon}^{\gamma}\right] }{\left\vert
c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert }<\omega,
\]
which together with (\ref{P-RT-1c}) implies that $I_{M,n}(\omega)\leq6\omega$.
As we can let $\omega$ arbitrarily small, this would establish that
$||\widehat{g}_{n}-g_{o}||_{\mathcal{G}}=O_{p}\left( \delta_{2,n}^{\ast
}\right) $. Equation (\ref{P-RT-1c}) is established by combining
(\ref{P-RT-2}), (\ref{P-RT-3}) and (\ref{P-RT-4}) below, which are proved in
several steps.
\noindent\underline{Step 1}:\ We prove that%
\begin{equation}
I_{M,n}(\omega)\leq\Pr\left( \sup_{g\in\mathcal{G}_{n}(M),h\in\mathcal{N}%
_{1,K_{1}}}\left[ I_{1,n}(g,h_{o})+I_{2,n}(g,h)\right] +K\delta_{2,n}%
^{\ast2}\geq0\right) +5\omega\label{P-RT-2}%
\end{equation}
where\ $K_{1}$ is a fixed constant such that $\Pr\left( \widehat{h}_{n}%
\notin\mathcal{N}_{1,K_{1}}\right) \leq\omega$ for all $n$,\ $K$ is some
fixed constant defined below,
\begin{align*}
I_{1,n}\left( g,h_{o}\right) & \equiv\mu_{n}\left[ \psi(Z_{2}%
,g,h_{o})-\psi(Z_{2},g_{n},h_{o})\right] ,\\
\text{and }I_{2,n}\left( g,h\right) & \equiv Q(g,h)-Q(g_{o},h).
\end{align*}
For this purpose, we first note that by the definition of $\widehat{g}_{n}$,
we can choose some sufficiently large constant $K_{1}>0$ such that
\begin{equation}
\Pr\left( Q_{n}(\widehat{g}_{n},\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}%
_{n})+K_{1}\varepsilon_{2,n}^{2}<0\right) \leq\omega. \label{P-RT-2a}%
\end{equation}
Combining (\ref{P-RT-1b}) and (\ref{P-RT-2a}), we have
\begin{equation}
I_{M,n}\left( \omega\right) \leq\Pr\left( \sup_{g\in\mathcal{G}_{n}%
(M)}Q_{n}(g,\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}_{n})+K_{1}\varepsilon
_{2,n}^{2}\geq0\right) +2\omega. \label{P-RT-2b}%
\end{equation}
It is clear that the term inside the parentheses on the RHS of (\ref{P-RT-2b})
is such that%
\begin{align*}
& Q_{n}(g,\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}_{n})\\
& =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}_{n})-\psi(Z_{2},g_{n},\widehat
{h}_{n})\right] +Q(g,\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})\\
& =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}_{n})-\psi\left( Z_{2}%
,g,h_{o}\right) \right] +\mu_{n}\left[ \psi\left( Z_{2},g_{n}%
,h_{o}\right) -\psi(Z_{2},g_{n},\widehat{h}_{n})\right] \\
& +\mu_{n}\left[ \psi\left( Z_{2},g,h_{o}\right) -\psi\left( Z_{2}%
,g_{n},h_{o}\right) \right] +Q(g,\widehat{h}_{n})-Q(g_{o},\widehat{h}_{n})\\
& +Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n}),
\end{align*}
and therefore,%
\begin{align}
& Q_{n}(g,\widehat{h}_{n})-Q_{n}(g_{n},\widehat{h}_{n})\nonumber\\
& =\mu_{n}\left[ \psi(Z_{2},g,\widehat{h}_{n})-\psi\left( Z_{2}%
,g,h_{o}\right) \right] +\mu_{n}\left[ \psi\left( Z_{2},g_{n}%
,h_{o}\right) -\psi(Z_{2},g_{n},\widehat{h}_{n})\right] \nonumber\\
& +Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})+I_{1,n}\left(
g,h_{o}\right) +I_{2,n}(g,\widehat{h}_{n}). \label{P-RT-2c}%
\end{align}
From Assumption \ref{R1}.(iii), we can choose some constant $K_{2}$
sufficiently large such that%
\begin{align}
& \Pr\left( \sup_{g\in\mathcal{G}_{n}(M)}\mu_{n}\left[ \psi(Z_{2}%
,g,\widehat{h}_{n})-\psi(Z_{2},g,h_{o})\right] \geq K_{2}\delta_{n}%
^{2},\widehat{h}_{n}\in\mathcal{N}_{1,K_{1}}\right) \nonumber\\
& \leq\Pr\left( \sup_{g\in\mathcal{N}_{2,K_{M}^{\ast}},h\in\mathcal{N}%
_{1,K_{1}}}\left\vert \mu_{n}\left[ \psi(Z_{2},g,h)-\psi(Z_{2},g,h_{o}%
)\right] \right\vert \geq K_{2}\delta_{n}^{2}\right) \leq\omega.
\label{P-RT-2d}%
\end{align}
Combining (\ref{P-RT-2b}), (\ref{P-RT-2c}), and (\ref{P-RT-2d}), we obtain%
\begin{equation}
I_{M,n}\left( \omega\right) \leq\Pr\left[ \left(
\begin{array}
[c]{c}%
\mu_{n}\left[ \psi\left( Z_{2},g_{n},h_{o}\right) -\psi(Z_{2}%
,g_{n},\widehat{h}_{n})\right] \\
+Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})\\
+\sup_{g\in\mathcal{G}_{n}(M)}\left[ I_{1,n}\left( g,h_{o}\right)
+I_{2,n}(g,\widehat{h}_{n})\right] \\
+K_{1}\varepsilon_{2,n}^{2}+K_{2}\delta_{n}^{2}%
\end{array}
\right) \geq0,\widehat{h}_{n}\in\mathcal{N}_{1,K_{1}}\right] +4\omega.
\label{P-RT-2b1}%
\end{equation}
By the definition of $\mathcal{N}_{2,K_{M}^{\ast}}$, we have $g_{n}%
\in\mathcal{N}_{2,K_{M}^{\ast}}$, which together with Assumption
\ref{R1}.(iii) implies that
\begin{align}
& \Pr\left( \mu_{n}\left[ \psi(Z_{2},g_{n},h_{o})-\psi(Z_{2},g_{n}%
,\widehat{h}_{n})\right] \geq K_{2}\delta_{n}^{2},\widehat{h}_{n}%
\in\mathcal{N}_{1,K_{1}}\right) \nonumber\\
& \leq\Pr\left( \sup_{g\in\mathcal{N}_{2,K_{M}^{\ast}},h\in\mathcal{N}%
_{1,K_{1}}}\left\vert \mu_{n}\left[ \psi(Z_{2},g,h_{o})-\psi(Z_{2}%
,g,h)\right] \right\vert \geq K_{2}\delta_{n}^{2}\right) \leq\omega.
\label{P-RT-2e}%
\end{align}
By the same argument that led to (\ref{P-RT-2b1}), we obtain%
\begin{equation}
I_{M,n}\left( \omega\right) \leq\Pr\left[ \left(
\begin{array}
[c]{c}%
Q(g_{o},\widehat{h}_{n})-Q(g_{n},\widehat{h}_{n})\\
+\sup_{g\in\mathcal{G}_{n}(M)}\left[ I_{1,n}\left( g,h_{o}\right)
+I_{2,n}(g,\widehat{h}_{n})\right] \\
+K_{1}\varepsilon_{2,n}^{2}+2K_{2}\delta_{n}^{2}%
\end{array}
\right) \geq0,\widehat{h}_{n}\in\mathcal{N}_{1,K_{1}}\right] +5\omega.
\label{P-RT-2b2}%
\end{equation}
From Assumption \ref{R1}.(i), we can choose some constant $K_{3}$ sufficiently
large such that%
\[
\sup_{h\in\mathcal{N}_{1,K_{1}}}\left\vert \mathbb{E}\left[ \psi\left(
Z_{2},g_{o},h\right) -\psi\left( Z_{2},g_{n},h\right) \right] \right\vert
1$ large enough such that $||g_{n}-g_{o}||_{\mathcal{G}}\leq
K_{\varepsilon}\delta_{2,n}^{\ast}$. Using Assumption \ref{R1}.(iv) and
similar arguments in showing (\ref{P-RT-4b}), we deduce that%
\begin{align}
& \frac{\mathbb{E}\left[ \left\vert \mu_{n}\left[ \psi(Z_{2},g_{n}%
,h_{o})-\psi(Z_{2},g_{o},h_{o})\right] \right\vert \right] }{\left\vert
c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert \delta_{2,n}^{\ast2}%
}\nonumber\\
& \leq\frac{\mathbb{E}\left[ \left\vert \sup\limits_{\left\{ g\in
\mathcal{G}_{n}:||g-g_{o}||_{\mathcal{G}}\leq K_{\varepsilon}\delta
_{2,n}^{\ast}\right\} }\mu_{n}\left[ \psi(Z_{2},g,h_{o})-\psi(Z_{2}%
,g_{o},h_{o})\right] \right\vert \right] }{\left\vert c_{K_{1},2}%
2^{2j}-K-c_{K_{1},1}2^{j}\right\vert \delta_{2,n}^{\ast2}}\nonumber\\
& \leq\frac{c_{1}(K_{\varepsilon}\delta_{2,n}^{\ast})^{\gamma}}{\sqrt
{n}\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert \delta
_{2,n}^{\ast2}}\frac{\phi_{n}(K_{\varepsilon}\delta_{2,n}^{\ast}%
)}{(K_{\varepsilon}\delta_{2,n}^{\ast})^{\gamma}}\nonumber\\
& \leq\frac{c_{1}K_{\varepsilon}^{\gamma}}{\left\vert c_{K_{1},2}%
2^{2j}-K-c_{K_{1},1}2^{j}\right\vert }\frac{\phi_{n}(\delta_{2,n}^{\ast}%
)}{\sqrt{n}\delta_{2,n}^{\ast2}}=\frac{c_{1}c_{2}K_{\varepsilon}^{\gamma}%
}{\left\vert c_{K_{1},2}2^{2j}-K-c_{K_{1},1}2^{j}\right\vert }.
\label{P-RT-4c}%
\end{align}
From (\ref{P-RT-4a}), (\ref{P-RT-4b}) and (\ref{P-RT-4c}), we get
(\ref{P-RT-4}).
\end{proof}
Theorem \ref{T-rate} indicates that the convergence rate of the second-step
sieve M estimator is determined by the convergence rate $\max\{ \delta
_{1,n},\delta_{n}\}$ of the estimation error introduced by the first-step
sieve estimation, the rate $\delta_{2,n}$ of the sieve approximation error of
$g_{o}$, the convergence rate $\varepsilon_{2,n}$ of the optimization error
and the measure $\delta_{g,n}$ of the complexity of the sieve space
$\mathcal{G}_{n}$.
Let $\Psi_{n,\delta}\equiv\left\{ \psi(Z_{2},g,h_{o})-\psi(Z_{2},g_{o}%
,h_{o}):\text{ }\left\Vert g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta
,g\in\mathcal{N}_{2,K}\right\} $ and let $H_{[]}\left( u,\Psi_{n,\delta
},\left\Vert \cdot\right\Vert _{2}\right) $ denote the bracket entropy of the
function class $\Psi_{n,\delta}$ with respect to the $L_{2}(dF_{Z})$-norm
$\left\Vert \cdot\right\Vert _{2}$. Define
\[
J_{[]}\left( \delta,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right)
=\int_{0}^{\delta}H_{[]}\left( u,\Psi_{n,\delta},\left\Vert \cdot\right\Vert
_{2}\right) du.
\]
Assumption \ref{R1}.(iii) and (iv) can be replaced by the following low level conditions.
\begin{assumption}
\label{R1'} (i) The data are i.i.d.; (ii)
\[
\sup_{\left\{ g\in\mathcal{N}_{2,K}:\text{ }\left\Vert g-g_{o}\right\Vert
_{\mathcal{G}}\leq\delta\right\} }\mathbb{E}\left[ \left\vert \psi
(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\right\vert ^{2}\right] \leq c\delta^{2};
\]
(iii) for any small $\delta>0$, there exists a constant $s_{1}\in(0,2)$ such
that
\[
\sup_{\left\{ g\in\mathcal{N}_{2,K}:\text{ }\left\Vert g-g_{o}\right\Vert
_{\mathcal{G}}\leq\delta\right\} }\left\vert \psi(Z,g,h_{o})-\psi
(Z,g_{o},h_{o})\right\vert \leq\delta^{s_{1}}U(Z)
\]
where $\mathbb{E}\left[ \left\vert U(Z)\right\vert ^{s_{2}}\right] \leq c$
for some $s_{2}\geq2$; (iv) there is a sequence of positive numbers
$\delta_{g,n}$ such that%
\[
\delta_{g,n}=\inf\left\{ \delta\in(0,1):\frac{J_{[]}\left( \delta
,\Psi_{n,\delta},\left\Vert \cdot\right\Vert _{2}\right) }{\sqrt{n}\delta
^{2}}\leq c\right\} ,
\]
where $\delta^{-\gamma}J_{[]}\left( \delta,\Psi_{n,\delta},\left\Vert
\cdot\right\Vert _{2}\right) $ is a decreasing function for some $\gamma
\in(0,2)$.
\end{assumption}
Assumption \ref{R1'}.(i), (ii) and (iii) are directly from the sufficient
conditions of Theorem 3.2 in Chen (2007) which establishes the convergence
rate of one-step sieve M estimation with \emph{i.i.d.} or m-dependent data.
The low level conditions in Assumption \ref{R1'} are easy to verify in
practice. However, the advantage of the high level assumption (\ref{AR1-4}) is
that it integrates the data structure and the metric entropy restriction into
one simple stochastic equicontinuity condition. As a result, the convergence
rate of the second-step sieve M estimator derived in this paper applies to the
general scenario with time series observation.
\begin{corollary}
\label{C-rate} Suppose that the conditions in Theorem \ref{CST}, Assumption
\ref{R1}.(i), (ii) and \ref{R1'} are satisfied. Furthermore, if $\left\Vert
g_{n}-g_{o}\right\Vert _{\mathcal{G}}=O(\delta_{2,n}^{\ast})$, then we have
$\left\Vert \widehat{g}_{n}-g_{o}\right\Vert _{\mathcal{G}}=O_{p}(\delta
_{2,n}^{\ast})$, where $\delta_{2,n}^{\ast}$ is defined in Theorem
\ref{T-rate}.
\end{corollary}
\begin{proof}
[Proof of Corollary \ref{C-rate}]By\ Assumption \ref{R1'}.(iii), we know that
for any small number $\omega>0$, there exists a sufficiently large constant
$M_{n}$ such that%
\begin{align*}
\Pr(\left\vert U(Z_{i})\right\vert & >M_{n}\text{ for all }i\leq n)\leq
\sum_{i=1}^{n}\Pr(\left\vert U(Z_{i})\right\vert >M_{n})\\
& \leq\sum_{i=1}^{n}\frac{\mathbb{E}\left[ \left\vert U(Z)\right\vert
^{s_{2}}\right] }{M_{n}^{s_{2}}}\leq cnM_{n}^{-s_{2}}\leq\omega,
\end{align*}
where the first inequality is by the Bonferroni inequality, and the second
inequality is by the Markov inequality.
Now, conditioning on the event $\{ \left\vert U(Z_{i})\right\vert \leq M_{n}$
for all $i\leq n\}$ and using Assumption \ref{R1'}.(iii), we have
\[
\left\vert \psi(Z_{i},g,h_{o})-\psi(Z_{i},g_{o},h_{o})\right\vert \leq
\delta^{s_{1}}M_{n}%
\]
for all $i\leq n$ and for any $\psi(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\in
\Psi_{n,\delta}$, which together with Assumption \ref{R1'}.(i) and (ii),
enables us to invoke Lemma 19.36 in Van der Vaart (1998) to get
\begin{align*}
& \mathbb{E}\left[ \sup_{\left\{ g\in\mathcal{G}_{n}:\text{ }\left\Vert
g-g_{o}\right\Vert _{\mathcal{G}}\leq\delta\right\} }\left\vert \mu
_{n}\left[ \psi(Z,g,h_{o})-\psi(Z,g_{o},h_{o})\right] \right\vert \right] \\
& \leq\frac{cJ_{[]}\left( \delta,\Psi_{n,\delta},\left\Vert \cdot\right\Vert
_{2}\right) }{\sqrt{n}}\left( 1+\frac{J_{[]}\left( \delta,\Psi_{n,\delta
},\left\Vert \cdot\right\Vert _{2}\right) }{\sqrt{n}\delta^{2}}M_{n}\right)
\equiv\frac{\phi_{n}(\delta)}{\sqrt{n}}.
\end{align*}
By Assumption \ref{R1'}.(iv), we know that the above function $\phi_{n}%
(\delta)$ satisfies the requirement (\ref{DR1}) in Theorem \ref{T-rate}. The
rest of the proof is the same as that of Theorem \ref{T-rate} and hence is omitted.
\end{proof}
\bigskip
\begin{thebibliography}{99} %
\bibitem {}Belloni, A., V. Chernozhukov, D. Chetverikov, and K. Kato (2015):
\textquotedblleft Some New Asymptotic Theory for Least Squares Series:
Pointwise and Uniform Results,\textquotedblright\ Journal of Econometrics,
186, 345--366.
\bibitem {}Belloni, A.,\ Chernozhukov, D., Chernozhukov, V., and
Fern\'{a}ndez-val, I. (2016): "Conditional Quantile Process Based on Series or
Many Regressors," Working Paper, Department of Economics, UCLA.
\bibitem {}Chen, X. (2007): \textquotedblleft Large Sample Sieve Estimation of
Semi-Nonparametric Models,\textquotedblright\ In: James J. Heckman and Edward
E. Leamer, Editor(s), \emph{Handbook of Econometrics}, 6B, Pages 5549-5632.
\bibitem {}Chen, X. and X. Shen (1998): \textquotedblleft Sieve Extremum
Estimates for Weakly Dependent Data,\textquotedblright\ \emph{Econometrica},
66, 289-314.
\bibitem {}Chen, X., and D. Pouzo (2012): \textquotedblleft Estimation of
Nonparametric Conditional Moment Models with Possibly Nonsmooth Generalized
Residuals,\textquotedblright\ \emph{Econometrica} 80, 277-321.
\bibitem {}Shen, X. and W.H. Wong (1994): \textquotedblleft Convergence Rate
of Sieve Estimates,\textquotedblright\ \emph{Annals of Statistics}, 22(2) 580--615.
\bibitem {}Van der Vaart, A. and J. Wellner (1996): \emph{Weak Convergence and
Empirical Processes: with Applications to Statistics}, New York: Springer-Verlag
\bibitem {}Van der Vaart, A. (1998): \emph{Asymptotic Statistics}, Cambridge:
Cambridge University Press.
\bibitem {}White, H. and J. Wooldridge (1991): \textquotedblleft Some Results
on Sieve Estimation with Dependent Observations\textquotedblright,\ in
Barnett, W.A., J. Powell and G. Tauchen (eds.), \emph{Non-parametric and
Semi-parametric Methods in Econometrics and Statistics}, 459-493, Cambridge:
Cambridge University Press.
\bibitem {}Wooldridge, J.M. (2002): \emph{Econometric Analysis of Cross
Section and Panel Data}, Cambridge: MIT Press.
\end{thebibliography}
\end{document}