\documentclass[11pt, letterpaper]{article}
\usepackage{times}
%\usepackage{harvard}
\usepackage{graphicx}
\usepackage{amssymb}
\usepackage{amsmath}
\usepackage{layout,rotating}
\usepackage{longtable,lscape}
\usepackage{amsthm}
%\usepackage{newcent}
\usepackage{fullpage}
\usepackage[usenames]{color}
%\usepackage[active]{srcltx} % SRC Specials for DVI Searching
\usepackage[T1]{fontenc}
\usepackage{multirow}
\usepackage{wasysym}
\usepackage[round]{natbib}
\usepackage[utf8]{inputenc}
\usepackage{verbatim}
\usepackage{arydshln}
\usepackage{amsfonts}
\usepackage{times}
\usepackage[dvips]{geometry}
\usepackage{setspace}
\usepackage{layout,rotating}
\usepackage[normalem]{ulem}
%\usepackage{showframe}
\usepackage{xr}
\externaldocument{JSCHO_ParDiff_04-30-2017}

\setcounter{MaxMatrixCols}{10}
\setlength{\paperwidth}{8.5in} \setlength{\paperheight}{11in}
\setlength{\textwidth}{6.5in} \setlength{\oddsidemargin}{0.0in}
\setlength{\textheight}{9.0in}
\setlength{\evensidemargin}{0.0in}\setlength{\marginparwidth}{0pt}
\setlength{\marginparpush}{0in} \setlength{\marginparsep}{0in}
\setlength{\headsep}{0pt} \setlength{\footskip}{0.5in}
\setlength{\footnotesep}{10pt} \setlength{\topmargin}{0.0in}
\setlength{\headheight}{0.in}
\def\baselinestretch{1.5}

\newtheorem{theorem}{Theorem}
\newtheorem{definition}{Definition}
\newtheorem{lemma}{Lemma}
\newtheorem{corollary}{Corollary}
\newtheorem{assumption}{Assumption}
\newtheorem{remarks}{Remarks}
\newcommand{\1}{\mathbf{1}}
\newcommand{\tr}{\mathrm{tr}}
\newcommand{\cl}{\mathrm{cl}}
\newcommand{\acov}{\mathrm{acov}}
\newcommand{\cov}{\mathrm{cov}}
\newcommand{\corr}{\mathrm{corr}}
\newcommand{\diag}{\mathrm{diag}}
\newcommand{\avar}{\mathrm{avar}}
\newcommand{\var}{\mathrm{var}}
\newcommand{\cF}{\mathcal{F}}
\newcommand{\cG}{\mathcal{G}}
\newcommand{\cH}{\mathcal{H}}
\newcommand{\cW}{\mathcal{W}}
\newcommand{\cX}{\mathcal{X}}
\newcommand{\cZ}{\mathcal{Z}}
\newcommand{\cLR}{\mathcal{LR}}
\newcommand{\cLM}{\mathcal{LM}}
\newcommand{\cQLM}{\mathcal{QLM}}
\newcommand{\cQLR}{\mathcal{QLR}}
\newcommand{\cQW}{\mathcal{QW}}
\newcommand{\cY}{\mathcal{Y}}
\newcommand{\bcZ}{\boldsymbol{\mathcal{Z}}}
\newcommand{\bcG}{\boldsymbol{\mathcal{G}}}
\newcommand{\bA}{\mathbf{A}}
\newcommand{\bB}{\mathbf{B}}
\newcommand{\bC}{\mathbf{C}}
\newcommand{\bD}{\mathbf{D}}
\newcommand{\bG}{\mathbf{G}}
\newcommand{\bH}{\mathbf{H}}
\newcommand{\bI}{\mathbf{I}}
\newcommand{\bM}{\mathbf{M}}
\newcommand{\bO}{\mathbf{O}}
\newcommand{\bQ}{\mathbf{Q}}
\newcommand{\bR}{\mathbf{R}}
\newcommand{\bS}{\mathbf{S}}
\newcommand{\bT}{\mathbf{T}}
\newcommand{\bU}{\mathbf{U}}
\newcommand{\bW}{\mathbf{W}}
\newcommand{\bX}{\mathbf{X}}
\newcommand{\bY}{\mathbf{Y}}
\newcommand{\bZ}{\mathbf{Z}}
\newcommand{\bg}{\mathbf{g}}
\newcommand{\bq}{\mathbf{q}}
\newcommand{\bx}{\mathbf{x}}
\newcommand{\bzero}{\mathbf{0}}
\newcommand{\bh}{\boldsymbol{h}}
\newcommand{\bv}{\boldsymbol{v}}
\newcommand{\bd}{\boldsymbol{d}}
\newcommand{\bs}{\boldsymbol{s}}
\newcommand{\btheta}{\boldsymbol{\theta}}
\newcommand{\bgamma}{\boldsymbol{\gamma}}
\newcommand{\biota}{\boldsymbol{\iota}}
\newcommand{\bbeta}{\boldsymbol{\beta}}
\newcommand{\beeta}{\boldsymbol{\eta}}
\newcommand{\balpha}{\boldsymbol{\alpha}}
\newcommand{\bxi}{\boldsymbol{\xi}}
\newcommand{\bdelta}{\boldsymbol{\delta}}
\newcommand{\bphi}{\boldsymbol{\phi}}
\newcommand{\bpsi}{\boldsymbol{\psi}}
\newcommand{\bpi}{\boldsymbol{\pi}}
\newcommand{\btau}{\boldsymbol{\tau}}
\newcommand{\blambda}{\boldsymbol{\lambda}}
\newcommand{\bupsilon}{\boldsymbol{\upsilon}}
\newcommand{\bmu}{\boldsymbol{\mu}}
\newcommand{\bomega}{\boldsymbol{\omega}}
\newcommand{\bPsi}{\boldsymbol{\Psi}}
\newcommand{\bLambda}{\boldsymbol{\Lambda}}
\newcommand{\bUpsilon}{\boldsymbol{\Upsilon}}
\newcommand{\bGamma}{\boldsymbol{\Gamma}}
\newcommand{\bOmega}{\boldsymbol{\Omega}}
\newcommand{\bveps}{\boldsymbol{\varepsilon}}
\newcommand{\bTheta}{\boldsymbol{\Theta}}
\newcommand{\bXi}{\boldsymbol{\Xi}}
\newcommand{\bPi}{\boldsymbol{\Pi}}
\newcommand{\plim}{\textrm{plim}}
\newcommand{\asim}{\stackrel{\textrm{A}}{\sim}}
\newcommand{\pto}{\stackrel{\mathbb{P}}{\to}}
\newcommand{\ato}{\stackrel{\textrm{a.s.}}{\to}}
\newcommand{\dequal}{\stackrel{\textrm{d}}{=}}
\newcommand{\bbP}{\mathbb{P}}
\newcommand{\wtilde}{\widetilde}
\newcommand{\fBox}{\hspace*{\fill}\blacksquare}
\newcommand{\fWox}{\hspace*{\fill}\square}
\newcommand\red[2][red]{{\color{#1}#2}}
\newcommand\xqed[1]{\leavevmode\unskip\penalty9999 \hbox{}\nobreak\hfill\quad\hbox{#1}}
\newtheorem{examplex}{Example}
\newenvironment{example}
  {\pushQED{\qed}\renewcommand{\qedsymbol}{$\square$}\examplex}
  {\popQED\endexamplex}
\newcommand\wbox{\xqed{$\square$}}
\newcommand\bbox{\xqed{$\blacksquare$}}

\begin{document}
%\layout
\thispagestyle{plain} \pagenumbering{roman}\setcounter{page}{0}

\title{Supplements to ``Directionally Differentiable Econometric Models''}

\author{
{\small \textsc{JIN SEO CHO}}\\
{\small School of Economics}\\
{\small Yonsei University}\\
{\small 50 Yonsei-ro, Seodaemun-gu, Seoul, 03722, Korea}\\
{\small Email: jinseocho@yonsei.ac.kr}\and
{\small \textsc{HALBERT WHITE}}\\
{\small Department of Economics}\\
{\small University of California, San Diego}\\
{\small 9500 Gilman Dr., La Jolla, CA, 92093-0508, U.S.A.}\\
{\small Email: hwhite@weber.ucsd.edu}}

\date{{\small This version: July, 2017}}

\maketitle

\begin{abstract}
\baselineskip=13pt%
We illustrate analyzing directionally differentiable econometric models and provide technical details which are not included in Cho and White (2017).

\bigskip\noindent{Key Words:} directionally differentiable quasi-likelihood function, Gaussian stochastic process, quasi-likelihood ratio test, Wald test, and Lagrange multiplier test statistics, stochastic frontier production function, GMM estimation, Box-Cox transform.

\bigskip\noindent\textbf{JEL Classification:}  C12, C13, C22, C32.


\bigskip\noindent\textbf{Acknowledgements:} The co-editor, Yoon-Jae Whang, and two anonymous referees provided very helpful comments for which we are most grateful. The second author (Halbert White) passed away while the submission version was written. He formed the outline of the paper and also provided an exemplary guide for writing a quality research paper. The authors benefited from discussions with Yoichi Arai, Seung Chan Ahn, In Choi, Horag Choi, Robert Davies, Graham Elliott, Chirok Han, John Hillas, Jung Hur, Hide Ichimura, Isao Ishida, Yongho Jeon, Estate Khmaladze, Chang-Jin Kim, Chang Sik Kim, Tae-Hwan Kim, Naoto Kunitomo, Hon Ho Kwok, Taesuk Lee, Bruce Lehmann, Mark Machina, Jaesun Noh, Kosuke Oya, Taeyoung Park, Peter Phillips, Erwann Sbai, Juwon Seo, Donggyu Sul, Denis Tkachenko, Albert K.C. Tsui, Hung-Jen Wang, Yoshihiro Yajima, Byung Sam Yoo, Ping Yu, and other seminar participants at Sogang University, the University of Auckland, the University of Hong Kong, the University of Tokyo, Osaka University, the Econometrics Study Group of the Korean Econometric Society, VUW, Yonsei University, and other conference participants at NZESG (Auckland, 2013). Cho acknowledges support from the Yonsei University Future-leading Research Initiative of 2017 (2017-22-0090).
\end{abstract}

%\begin{small}

%\end{small}

\pagebreak\pagenumbering{arabic}\setcounter{page}{1}

\setcounter{theorem}{0} \setcounter{section}{0}
\renewcommand{\thetheorem}{\Alph{section}\arabic{theorem}}

\section{Introduction}

This Supplement illustrates the analysis of econometric models formed by
directionally differentiable (D-D) quasi-likelihood functions and
provides technical details which are not included in Cho and White
(\citeyear{Cho2017}). All theorems, assumptions, and corollaries are
those in Cho and White (\citeyear{Cho2017}) unless otherwise stated.

\section{Examples}

In this section, we illustrate the analysis of D-D econometric models using the stochastic frontier production function in Aigner, Lovell, and Schmidt (\citeyear{Aigner1977}) and Stevenson (\citeyear{Stevenson1980}); Box and Cox's (\citeyear{Box1964}) transformation; and the standard generalized methods of moments (GMM) estimation in Hansen (\citeyear{Hansen1982}).

\subsection{Example 1: Stochastic Frontier Production Function Models}

A D-D quasi-likelihood function is found from the theory of stochastic frontier production function models. Stochastic production function models are often specified for identically and independently distributed (IID) observations $\{Y_{t}, \bX_{t}\}$ as
\begin{equation*}
Y_t = \bX_{t}' \bbeta_* + U_t,
\end{equation*}
where $Y_t \in \mathbb{R}$ is the output produced by inputs $\bX_t \in \mathbb{R}^{k}$ such that $\bbeta_*$ is an interior element of $\bB \subset \mathbb{R}^{k}$, $E[U_{t}^{2}] < \infty$, $E[X_{t,j}^{2}] < \infty$ for $j=1, 2, \ldots, k$, and $E[\bX_{t}\bX_{t}']$ is positive definite. Here, $U_t$ stands for an error which is independent of $\bX_{t}$. This model was first introduced by Aigner, Lovell, and Schmidt (\citeyear{Aigner1977}).

One of the early uses of this specification is in identifying inefficiently produced outputs. Given output levels subject to the production function and inputs, outputs are inefficiently produced if $E[U_{t}] < 0$. Aigner, Lovell, and Schmidt (\citeyear{Aigner1977}) captured this inefficiency by decomposing $U_{t}$ into $U_{t} \equiv V_{t} - W_{t}$, where $V_{t} \sim N(0, \tau_{*}^{2})$, $W_{t} := \max[0, Q_{t}]$, $Q_{t} \sim N(\mu_{*}, \sigma_{*}^{2})$, and $V_{t}$ is independent of $W_{t}$. Here, it is assumed that $\tau_{*} > 0$, $\sigma_{*} \geq 0$, and $\mu_{*} \geq 0$, and $W_{t}$ is employed to capture inefficiently produced outputs. If $\mu_{*} = 0$ and $\sigma_{*}^{2} = 0$, this model reduces to Zellner, Kmenta, and Dr\`{e}ze's (\citeyear{Zellner1966}) stochastic production function model, producing outputs efficiently. The key to the identification of inefficiency is, therefore, in testing whether $\mu_{*} = 0$ and $\sigma_{*}^{2} = 0$.

The original model introduced by Aigner, Lovell, and Schmidt (\citeyear{Aigner1977}) assumes $\mu_{*} = 0$, so that the mode of $W_{t}$ is always achieved at zero. Stevenson (\citeyear{Stevenson1980}) suggested to extend the model scope by letting $\mu_{*}$ be different from zero, and the model with unknown $\mu_{*}$ has been popularly specified in empirical works since then (e.g., Dutta, Narasimhan, and Rajiv (\citeyear{Dutta1999}), Habib and Ljungqvist (\citeyear{Habib2005})).

Nevertheless, it is hard to find a proper methodology which tests $\mu_{*} = 0$ and $\sigma_{*}^{2} = 0$ in prior literature to the best of our knowledge. This is mainly because the likelihood value is not identified under the null. Note that for each $(\bbeta, \sigma,\, \mu, \tau)$, the log-likelihood is given as
\begin{equation*}
L_{n}(\bbeta, \sigma,\, \mu, \tau) = \sum_{t=1}^{n} \left\{\ln\left[ \phi\left( \frac{Y_{t} - \bX_{t}' \bbeta + \mu}{\sqrt{\sigma^{2} + \tau^{2}}} \right)\right] -\frac{1}{2} \ln(\sigma^{2} + \tau^{2}) -  \ln\left[\Phi\left(\frac{\mu}{\sqrt{\sigma^{2}}}\right)/ \Phi\left( \frac{\widetilde{\mu}_{t}}{\sqrt{\widetilde{\sigma}^{2}}}\right)\right]  \right\},
\end{equation*}
where $\phi(\,\cdot\,)$ and $\Phi(\,\cdot\,)$ are the probability density function (PDF) and cumulative density function (CDF) of a standard normal random variable, respectively, and
\begin{equation*}
\widetilde{\mu}_{t} := \frac{\tau^{2} \mu - \sigma^{2} (Y_{t} - \bX_{t}' \bbeta )}{\tau^{2} + \sigma^{2}}\;\;\;\;\;\text{and}\;\;\;\;\; \widetilde{\sigma}^{2} := \frac{\tau^{2} \sigma^{2} }{\tau^{2} + \sigma^{2}}.
\end{equation*}
Here, the log-likelihood is not identified if $\btheta_{*} := (\bbeta_{*}', \mu_{*}, \sigma_{*}, \tau_{*})' = (\bbeta_{*}', 0, 0, \tau_{*})$ because $\mu_{*} / \sqrt{\sigma_{*}^{2}} = 0/0$, so that $\ln[\Phi(\mu_{*}/\sqrt{\sigma_{*}^{2}})]$ is not properly identified. Furthermore, if we let
\begin{equation*}
\widetilde{\mu}_{*t} := \frac{\tau_{*}^{2} \mu_{*} - \sigma_{*}^{2} U_{t} }{\tau_{*}^{2} + \sigma_{*}^{2}}\;\;\;\;\;\text{and}\;\;\;\;\; \widetilde{\sigma}_{*}^{2} := \frac{\tau_{*}^{2} \sigma_{*}^{2} }{\tau_{*}^{2} + \sigma_{*}^{2}},
\end{equation*}
$\widetilde{\mu}_{*t} / \sqrt{\widetilde{\sigma}_{*}^{2}} = 0/0$, so that $\ln[\Phi(\widetilde{\mu}_{*t}/\sqrt{\widetilde{\sigma}_{*}^{2}})]$ is not identified by the model.

Even further, this model is not differentiable (D). This aspect is verified by examining the first-order directional derivative of the model. Some tedious algebra shows that for a given $\bd := (\bd_{\bbeta}', d_{\mu}, d_{\sigma}, d_{\tau})'$,
\begin{equation*}
\lim_{h \downarrow 0} L_{n}(\btheta_{*} + h \bd) = -\frac{n}{2} \ln(\tau_{*}^{2}) + \sum_{t=1}^{n} \ln\left[ \phi\left( \frac{Y_{t} - \bX_{t}' \bbeta_{*} }{\sqrt{\tau_{*}^{2}}} \right)\right],
\end{equation*}
which is the log-likelihood desired by the null condition. This limit is obtained by particularly using the fact that
\begin{equation*}
\lim_{h \downarrow 0} \Phi\left( \frac{h d_{\mu}}{\sqrt{(h d_{\sigma})^{2}}} \right) = \Phi\left( \frac{d_{\mu}}{\sqrt{d_{\sigma}^{2}}} \right) \;\;\;\text{and}\;\;\; \lim_{h \downarrow 0} \Phi\left(\frac{\widetilde{\mu}_{*t}(h;\bd)}{\sqrt{\widetilde{\sigma}(h;\bd)^{2}}}\right) = \Phi\left( \frac{ d_{\mu}}{\sqrt{d_{\sigma}^{2}}} \right),
\end{equation*}
where
\begin{equation*}
\widetilde{\sigma}_{*}(h;\bd)^{2} := \frac{(\tau_{*} + h d_{\tau})^{2} (h d_{\sigma})^{2} }{(\tau_{*} + h d_{\tau})^{2} + (h d_{\sigma})^{2}}\;\;\;\text{and}
\end{equation*}
\begin{equation*}
\widetilde{\mu}_{*t}(h;\bd) := \frac{(\tau_{*} + h d_{\tau})^{2} h d_{\mu} - (h d_{\sigma})^{2} (Y_{t} - \bX_{t}'(\bbeta_* + h \bd_{\bbeta} )) }{(\tau_{*} + h d_{\tau})^{2} + (h d_{\sigma})^{2}}.
\end{equation*}
Using this directional limit, the first- and second-order directional derivatives of $L_{n}(\,\cdot\,)$ at $(\bbeta_{*}, 0, 0, \tau_{*})$ are
\begin{equation*}
DL_{n}(\btheta_{*}; \bd) = \sum_{t=1}^{n} \frac{1}{\tau_{*}^{3}}\left\{ d_{\tau}(U_{t}^{2} - \tau_{*}^{2}) +  \left[ -d_{\mu} + \bX_{t}' \bd_{\bbeta} - \psi\left(d_{\mu}, d_{\sigma} \right) \right] \tau_{*} U_{t} \right\},
\end{equation*}
and
\begin{align*}
D^{2}L_{n}(\btheta_{*}; \bd) = &\sum_{t=1}^{n} \frac{1}{\tau_{*}^{4}}\left\{ d_{\sigma}^{2} (U_{t}^{2} - \tau_{*}^{2}) +  d_{\tau}^{2} \tau_{*}^{2} - d_{\tau}U_{t} -(d_{\mu}- \bX_{t}' \bd_{\bbeta}) \tau_{*}] [3 d_{\tau}U_{t}  -(d_{\mu}- \bX_{t}' \bd_{\bbeta}) \tau_{*}] \right\} \notag \\
&\;\;\;\;- \sum_{t=1}^{n}\frac{1}{ \tau_{*}^{4}} \left\{\psi(d_{\mu}, d_{\sigma} )^{2} U_{t}^{2} + \psi(d_{\mu}, d_{\sigma}) [d_{\mu}U_{t}^{2} - 4 d_{\tau} \tau_{*} U_{t} + (d_{\mu} - 2 \bX_{t}'\bd_{\bbeta})\tau_{*}^{2} ]\right\},
\end{align*}
respectively, where $\psi(d_{\mu}, d_{\sigma} ):= \vert d_{\sigma} \vert {\phi(d_{\mu}/\vert d_{\sigma} \vert)}/{\Phi(d_{\mu}/\vert d_{\sigma} \vert)}$. Here, if $\btheta_{*} = (\bbeta_{*}', 0, 0, \tau_{*})$, it follows that $U_{t} \sim N(0, \tau_{*}^{2})$. These directional derivatives are neither linear nor quadratic with respect to $\bd$, respectively, so that $L_{n}(\cdot)$ is not twice D. Therefore, this model cannot be analyzed as for the standard D model. We examine this model by letting
\begin{equation*}
\Delta(\btheta_{*}) := \left\{ \bd \in \mathbb{R}^{d+3}: \bd'\bd = 1, d_{\mu} \geq 0, \;\text{and}\; d_{\sigma} \geq 0 \right\}
\end{equation*}
to accommodate the condition that $\mu_{*} \geq 0$ and $\sigma_{*} \geq 0$.

It is not hard to identify the asymptotic behaviors of the first- and second-order directional derivatives. Note that $D L_{n}(\btheta_{*}; \bd) = Z_{1,n}(\bd) + Z_{2,n}(\bd)$, where for each $\bd$,
\begin{equation*}
Z_{1,n}(\bd) := \frac{d_{\tau}}{\tau_{*}^{3}}\sum_{t=1}^{n} (U_{t}^{2}- \tau_{*}^{2}),\;\;\;\; Z_{2,n}(\bd):= \frac{1}{\tau_{*}^{2}} \sum_{t=1}^{n} \left[ \bX_{t}' \bd_{\bbeta} + m\left(d_{\mu}, d_{\sigma} \right) \right] U_{t},
\end{equation*}
and $m(d_{\mu}, d_{\sigma}) := -[ d_{\mu} + \psi\left(d_{\mu}, d_{\sigma} \right)]$. Here, $\psi\left(\cdot, \cdot \right)$ is Lipschitz continuous, so that Assumption \ref{assm:5}(\emph{iii}) holds with respect to the first-order directional derivative. Furthermore, for each $\bd$, McLeish's (\citeyear{McLeish1974}, theorem 2.3) central limit theorem (CLT) can be applied to $Z_{1,n}(\bd)$ and $Z_{2,n}(\bd)$: for each $\bd$,
\begin{equation*}
n^{-1/2} \left[\begin{array}{c} Z_{n,1}(\bd) \\ Z_{n,2}(\bd) \end{array} \right] \Rightarrow \left[\begin{array}{c} \cZ_{1}(\bd) \\ \cZ_{2}(\bd) \end{array} \right] \sim N\left(\left[\begin{array}{c} 0 \\ 0 \end{array} \right], \frac{1}{\tau_{*}^{2}} \left[\begin{array}{cc} 2 d_{\tau}^{2} & 0 \\ 0 & E[( \bX_{t}' \bd_{\bbeta} + m\left(d_{\mu}, d_{\sigma} \right))^{2} ] \end{array} \right] \right).
\end{equation*}
It follows that for each $\bd$ and $\widetilde{\bd}$,
\begin{equation*}
E[\cZ_{1}(\bd)\cZ_{1}(\widetilde{\bd})] = 2 \frac{d_{\tau} \widetilde{d}_{\tau}}{\tau_{*}^{2}},\;\;\;\;\; E[\cZ_{1}(\bd)\cZ_{2}(\widetilde{\bd})] = 0,\;\;\;\;\text{and}
\end{equation*}
\begin{equation*}
E[\cZ_{2}(\bd)\cZ_{2}(\widetilde{\bd})] = \frac{1}{\tau_{*}^{2}} \left[\begin{array}{c} m\left(d_{\mu}, d_{\sigma} \right) \\ \bd_{\bbeta} \end{array} \right]' \left[ \begin{array}{cc} 1 & E[\bX_{t}']\\
E[\bX_{t}] & E[\bX_{t} \bX_{t}']  \end{array} \right] \left[\begin{array}{c} m(\widetilde{d}_{\mu}, \widetilde{d}_{\sigma} )\\ \widetilde{\bd}_{\bbeta} \end{array} \right].
\end{equation*}
Here, $Z_{n,1}(\bd)$ and $Z_{n,2}(\bd)$ are linear with respect $d_{\tau}$ and $[m\left(d_{\mu}, d_{\sigma} \right), \bd_{\bbeta}' ]'$, respectively. From this fact, their tightness trivially follows, so that $n^{-1/2} D L_{n}(\btheta_{*}; \,\cdot\,) \Rightarrow \cZ(\cdot)$, where $\cZ(\cdot)$ is a zero-mean Gaussian stochastic process such that for each $\bd$ and $\widetilde{\bd}$, $E[\cZ(\bd) \cZ(\widetilde{\bd})] = B_{*}(\bd, \widetilde{\bd})$ and
\begin{equation*}
B_{*}(\bd, \widetilde{\bd}) := \frac{1}{\tau_{*}^{2}} \left[\begin{array}{c} \bd_{\bbeta} \\ m\left(d_{\mu}, d_{\sigma} \right)\\ d_{\tau} \end{array} \right]' \left[ \begin{array}{ccc} E[\bX_{t} \bX_{t}'] & E[\bX_{t}] & 0\\ E[\bX_{t}'] & 1 & 0 \\ 0 & 0 & 2 \end{array} \right] \left[\begin{array}{c} \widetilde{\bd}_{\bbeta} \\ m(\widetilde{d}_{\mu}, \widetilde{d}_{\sigma} )\\ \widetilde{d}_{\tau} \end{array} \right].
\end{equation*}
Let $\cZ(\cdot)$ be defined as $\cZ_{1}(\cdot) + \cZ_{2}(\cdot)$.

We provide another Gaussian stochastic process with the same covariance structure as that of $\cZ(\cdot)$. If we let $\widetilde{\cZ}(\bd) := \bdelta(\bd)' \bOmega_{*}^{1/2} \bW$ such that for each $\bd$,
\begin{equation*}
\bdelta(\bd) := \left[\begin{array}{c} \bd_{\bbeta} \\ m\left(d_{\mu}, d_{\sigma} \right) \\ d_{\tau} \end{array} \right],\;\;\;\;\;\;\; \bOmega_{*} :=\frac{1}{\tau_{*}^{2}} \left[ \begin{array}{ccc} E[\bX_{t} \bX_{t}'] & E[\bX_{t}] & 0\\ E[\bX_{t}'] & 1 & 0\\ 0 & 0 & 2 \end{array} \right],
\end{equation*}
and $\bW \sim N(\bzero_{k+2}, \bI_{k+2})$, it follows that $E[\widetilde{\cZ}(\bd)\widetilde{\cZ}(\widetilde{\bd})] = \bdelta(\bd)' \bOmega_{*} \bdelta(\widetilde{\bd})$ which is identical to $B_{*}(\bd, \widetilde{\bd})$, so that $\widetilde{\cZ}(\cdot) \dequal \cZ(\cdot)$. Furthermore, $\widetilde{\cZ}(\cdot)$ is linear with respect to $\bW$. This feature makes it convenient to analyze the asymptotic distribution of the first-order directional derivative.

The probability limit of the second-order directional derivative is similarly obtained. Note that $D^{2}L_{n}(\btheta_{*};$ $\cdot)$ is Lipschitz continuous on $\Delta(\btheta_{*})$, so that Assumption \ref{assm:5}(\emph{iii}) holds, and we can apply the law of large numbers (LLN):
\begin{equation*}
\frac{1}{n}\sum_{t=1}^{n} U_{t}^{2} = \tau_{*} + o_{\bbP}(1),\;\;\; \frac{1}{n}\sum_{t=1}^{n} U_{t} \bX_{t} = o_{\bbP}(1),\;\;\;\text{and}\;\;\;
\frac{1}{n}\sum_{t=1}^{n} \bX_{t}\bX_{t}' = E[\bX_{t}\bX_{t}'] + o_{\bbP}(1).
\end{equation*}
This implies that
\begin{equation*}
n^{-1} D^{2}L_{n}(\btheta_{*}; \bd) \ato - \frac{1}{\tau_{*}^{2}} \left\{ 2 d_{\tau}^{2} + E[(d_{\mu} - \bX_{t}' \bd_{\bbeta})^{2}] + \psi(d_{\mu}, d_{\sigma})^{2} + 2 [d_{\mu} - E[\bX_{t}]'\bd_{\bbeta}] \psi(d_{\mu}, d_{\sigma}) \right\},
\end{equation*}
and this is identical to $- B(\bd, \bd)$. Thus, $2\{L_{n}(\widehat{\btheta}_{n}) - L_{n}(\btheta_{*}) \} \Rightarrow \sup_{\bd \in \Delta(\btheta_{*})} [0, \cY(\bd) ]^{2}$ by Theorem \ref{thm:1}(\emph{iii}), where
\begin{equation*}
\cY(\bd):= \frac{\bdelta(\bd)' \bOmega_{*}^{1/2} \bW}{\{\bdelta(\bd)' \bOmega_{*} \bdelta(\bd)\}^{1/2}},
\end{equation*}
and for each $\bd$ and $\widetilde{\bd}$,
\begin{equation*}
E[\cY(\bd) \cY(\widetilde{\bd})] = \frac{\bdelta(\bd)' \bOmega_{*}\bdelta(\widetilde{\bd})}{\{\bdelta(\bd)' \bOmega_{*}\bdelta(\bd)\}^{1/2}\{\bdelta(\widetilde{\bd})' \bOmega_{*}\bdelta(\widetilde{\bd})\}^{1/2}}.
\end{equation*}
As a result, the directional limit of the likelihood is well defined under the null, although the log-likelihood is not properly identified under the null.

We can test the hypothesis of efficient production using the QLR, Wald, and LM test statistics. For this examination, we let $\bupsilon = (\mu, \sigma)'$, $\blambda = \bbeta$, $\btau = \tau$, and $\bpi = (\bbeta', \bupsilon')' = (\bbeta', \mu, \sigma)'$ and follow the notation in Section 3 of Cho and White (\citeyear{Cho2017}). The hypotheses of interest here are
\begin{equation*}
H_{0}: \bupsilon_{*} = \bzero\;\;\; \text{versus}\;\;\; H_{1}: \bupsilon_{*} \neq \bzero.
\end{equation*}
Then, for each $\bd$ and $\widetilde{\bd}$,
\begin{equation*}
\bB_{*}(\bd,\widetilde{\bd}) = \left[\begin{array}{cc} \bB_{*}^{(\bpi,\bpi)}(\bd_{\bpi},\widetilde{\bd}_{\bpi}) & \bzero'\\ \bzero& \frac{2}{\tau_{*}^{2}}{{d_{\tau}}'}\widetilde{d}_{\tau}\;\; \end{array} \right],
\end{equation*}
and
\begin{equation*}
\bB_{*}^{(\bpi,\bpi)}(\bd_{\bpi},\widetilde{\bd}_{\bpi}) = \frac{1}{\tau_{*}^{2}} \left[\begin{array}{cc} \bd_{\bbeta}' E[\bX_{t} \bX_{t}'] \widetilde{\bd}_{\bbeta} & \bd_{\bbeta}' E[\bX_{t}'] m(d_{\mu}, d_{\sigma})\\
m(\widetilde{d}_{\mu}, \widetilde{d}_{\sigma}) E[\bX_{t}] \widetilde{\bd}_{\bbeta} & m(d_{\mu}, d_{\sigma}) m(\widetilde{d}_{\mu}, \widetilde{d}_{\sigma}) \end{array}\right].
\end{equation*}
By the information matrix equality, for each $\bd$, $\bB_{*}(\bd)$ is identical to $-\bA_{*}(\bd)$.

The null limit distributions of the test statistics are identified by the theorems in Cho and White (\citeyear{Cho2017}). First, we apply the QLR test statistic. Applying Theorem \ref{thm:2} shows that
\begin{equation*}
\cLR_{n}^{(1)} := 2\{L_{n}(\widehat{\btheta}_{n}) - L_{n}(\btheta_{*})\} \Rightarrow \sup_{\bs_{\bpi} \in \Delta(\bpi_{*})} \max [0, \cY^{(\bpi)}(\bs_{\bpi})]^{2} + \cH_{2},
\end{equation*}
where for each $\bs_{\bpi} \in \Delta(\bpi_{*}) :=\{(\bs_{\bbeta}', s_{\mu}, s_{\sigma})' \in \mathbb{R}^{k+2}: \bs_{\bbeta}'\bs_{\bbeta} + s_{\mu}^{2} + s_{\sigma}^{2} = 1, s_{\mu} > 0, \;\text{and}\; s_{\sigma} > 0 \}$,
\begin{equation*}
\cY^{(\bpi)}(\bs_{\bpi}):= \{ E[(\bs_{\bbeta}' \bX_{t} + m(s_{\mu}, s_{\sigma}))^{2}] \}^{-1/2} \cZ^{(\bpi)}(\bs_{\bpi}),
\end{equation*}
$\cZ^{(\bpi)}(\bs_{\bpi}) := {\bs_{\bbeta}'}\bZ^{(\bbeta)}+ m(s_{\mu}, s_{\sigma}) Z^{(\bupsilon)}$, and
\begin{equation*}
\left[\begin{array}{c} \bZ^{(\bbeta)} \\ Z^{(\bupsilon)} \end{array}\right] \sim N\left( \left[\begin{array}{c} \bzero\\ 0 \end{array}\right], \left[\begin{array}{cc} E[\bX_{t} \bX_{t}'] & E[\bX_{t}] \\ E[\bX_{t}'] & 1 \end{array}\right]\right).
\end{equation*}
Note that $[\bZ^{(\bbeta)'}, Z^{(\bupsilon)}]'$ is the weak limit of $n^{-1/2}\tau_{*}^{-1} \sum_{t=1}^{n}[ U_{t} \bX_{t}', U_{t}]'$. Theorem \ref{thm:2}(\emph{iv}) implies that $\cLR_{n}^{(1)}:=  2\{L_{n}(\widehat{\btheta}_{n}) - L_{n}(\btheta_{*})\} \Rightarrow \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{*})}{\max[0,\widetilde{\cY}^{(\bupsilon)}(\bs_{\bupsilon})]^{2}} + \bZ^{(\bbeta)'} E[\bX_{t} \bX_{t}']^{-1}\bZ^{(\bbeta)} + \cH_{2}$, where for each $\bs_{\bupsilon_{*}} \in \Delta(\bupsilon_{*}):= \{ (s_{\mu}, s_{\sigma})' \in \mathbb{R}^{2}: s_{\mu}^{2} + s_{\sigma}^{2} = 1, s_{\mu} > 0, \;\text{and}\; s_{\sigma} > 0  \}$,
\begin{equation*}
\widetilde{\cY}^{(\bupsilon)}(\bs_{\bupsilon}):= (\widetilde{B}_{*}^{(\bupsilon,\bupsilon)}(\bs_{\bupsilon}))^{-1/2}\widetilde{\cZ}^{(\bupsilon)}(\bs_{\bupsilon}),
\end{equation*}
\begin{equation*}
\widetilde{B}_{*}^{(\bupsilon,\bupsilon)}(\bs_{\bupsilon}) := m(s_{\mu}, s_{\sigma})^{2} \{1- E[\bX_{t}]'E[\bX_{t} \bX_{t}']^{-1}
E[\bX_{t}]\},
\end{equation*}
and $\widetilde{\cZ}^{(\bupsilon)}(\bs_{\bupsilon}) := m(s_{\mu}, s_{\sigma})\{ Z^{(\bupsilon)} - E[\bX_{t}]' E[\bX_{t} \bX_{t}']^{-1}\bZ^{(\bbeta)} \}$. Furthermore, Theorem \ref{thm:2} shows that
\begin{equation*}
\cLR_{n}^{(2)}:= 2\{L_{n}(\ddot{\btheta}_{n}) - L_{n}(\btheta_{*})\} \Rightarrow \sup_{\bs_{\bbeta} \in \Delta(\bbeta_{*})} \max [0, \cY^{(\bbeta)}(\bs_{\bbeta})]^{2} + \cH_{2},
\end{equation*}
where for each $\bs_{\bbeta} \in \Delta(\bbeta_{*}) := \{ \bs_{\bbeta} \in \mathbb{R}^{k} : \bs_{\bbeta}'\bs_{\bbeta} = 1\}$, $\cY^{(\bbeta)}(\bs_{\bbeta}):= \{\bs_{\bbeta}'E[\bX_{t} \bX_{t}'] \bs_{\bbeta}  \}^{-1/2} \bZ^{(\bbeta)'} \bs_{\bbeta}$, and applying Theorem \ref{thm:2}(\emph{iii}) implies that $\cLR_{n}^{(2)}:= 2\{L_{n}(\ddot{\btheta}_{n}) - L_{n}(\btheta_{*})\} \Rightarrow \bZ^{(\bbeta)'} E[\bX_{t} \bX_{t}']^{-1}\bZ^{(\bbeta)} + \cH_{2}$. Therefore, Theorem \ref{thm:2}(\emph{iv}) now yields that
\begin{equation*}
\cLR_{n} \Rightarrow \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})}\max\left[0, \frac{m(s_{\mu}, s_{\sigma})}{\vert m(s_{\mu}, s_{\sigma}) \vert} Z\right]^{2}
\end{equation*}
under $H_{0}$, where $Z := \{1- E[\bX_{t}]'E[\bX_{t} \bX_{t}']^{-1} E[\bX_{t}]\}^{-1/2}$ $\{ Z^{(\bupsilon)} - E[\bX_{t}]' E[\bX_{t} \bX_{t}']^{-1}\bZ^{(\bbeta)}\} \sim N(0,1)$. If we let $r(x):= \phi(x)/[x \Phi(x)]$,
\begin{equation*}
\frac{m(s_{\mu}, s_{\sigma})}{\vert m(s_{\mu}, s_{\sigma}) \vert} = -\frac{s_{\mu}}{\vert s_{\mu}\vert}  \left(\frac{1 + r(s_{\mu}/\vert s_{\sigma}\vert)}{\vert 1 + r(s_{\mu}/\vert s_{\sigma}\vert) \vert}\right),
\end{equation*}
which is $- 1$ uniformly on $\Delta(\bupsilon_{0})$. Thus, the null limit distribution reduces to $\max [0, - Z]^{2}$, and this implies that $\cLR_{n} \asim \max [0, -Z]^{2}$ under $H_{0}$.

We conduct simulations to verify this. We let $(\bX_{t}', U_{t})' \sim$ IID $N(\bzero_{2}, \bI_{2})$ and obtain the null limit distribution of the QLR test statistic by repeating the same independent experiments 2,000 times for $n=$ 50, $100$, and $200$. Simulation results are summarized in Figure \ref{fig:figure4} of this Supplement. Note that the null distributions of the QLR test statistics exactly overlap with that of $\max [0, -Z]^{2}$.

Second, we examine the Wald test. For this, if we let
\begin{equation*}
\widehat{W}_{n}(s_{\mu}, s_{\sigma}) := m(s_{\mu}, s_{\sigma})^{2} \left\{1 - n^{-1} \sum_{t=1}^{n} \bX_{t}' (n^{-1} \sum_{t=1}^{n} \bX_{t}\bX_{t}')^{-1} n^{-1} \sum_{t=1}^{n} \bX_{t}\right\},
\end{equation*}
the LLN implies that $\sup_{s_{\mu}, s_{\sigma}} \vert \widehat{W}_{n}(s_{\mu}, s_{\sigma}) - \widetilde{B}_{*}^{(\bupsilon, \bupsilon)}(s_{\mu}, s_{\sigma}) \vert \to 0$ a.s.$-\bbP$. In particular, $m(\,\cdot\,,\,\cdot\,)^2$ is bounded by 1 and $2/\pi$ from above and below, respectively. Using $\widehat{W}_{n}(s_{\mu}, s_{\sigma})$, we let the Wald test statistic be
\begin{equation*}
\cW_{n} := \sup_{s_{\mu}, s_{\sigma}} n\{\widetilde{h}_{n}^{(\bupsilon)}(s_{\mu}, s_{\sigma})\} \{\widehat{W}_{n}(s_{\mu}, s_{\sigma})\}\{\widetilde{h}_{n}^{(\bupsilon)}(s_{\mu}, s_{\sigma})\},
\end{equation*}
where $\widetilde{h}_{n}^{(\bupsilon)}(s_{\mu}, s_{\sigma})$ is such that for each $(s_{\mu}, s_{\sigma})$,
\begin{align*}
L_{n}(\widetilde{h}_{n}^{(\bupsilon)}(s_{\mu}, s_{\sigma}) s_{\mu}, & \widetilde{h}_{n}^{(\bupsilon)}(s_{\mu}, s_{\sigma}) s_{\sigma}, \widetilde{\bbeta}_{n}(s_{\mu}, s_{\sigma}), \widetilde{\tau}_{n}(s_{\mu}, s_{\sigma}))\\
& = \sup_{\{h^{(\bupsilon)}, \bbeta, \tau\}} L_{n}( h^{(\bupsilon)}(s_{\mu}, s_{\sigma}) s_{\mu}, h^{(\bupsilon)}(s_{\mu}, s_{\sigma})s_{\sigma}, \bbeta, \tau).
\end{align*}
Theorem \ref{thm:3} implies that $\cW_{n} \Rightarrow \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})} \max[0, \widetilde{\cY}^{(\bupsilon)}(\bs_{\bupsilon})]^{2}$, and this weak limit is identical to that of the QLR test statistic. Thus, $\cW_{n} \asim \max [0, - Z]^{2}$ under $H_{0}$.

Finally, we investigate the LM test statistic. We let the LM test statistic be
\begin{equation*}
\cLM_{n} := \sup_{(s_{\mu}, s_{\sigma}, \bs_{\bbeta}) \in \Delta(\bupsilon_{0}) \times \Delta(\ddot{\bbeta}_{n})} n \widetilde{W}_{n}(s_{\mu}, s_{\sigma}, \bs_{\bbeta}) \max\left[0, \frac{-DL_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma})} {\widetilde{D}^{2}L_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma}, \bs_{\bbeta})}\right]^{2},
\end{equation*}
where $\ddot{\btheta}_{n} = (\ddot{\bbeta}_{n}, 0, 0, \ddot{\tau}_{n})$ with $\ddot{\bbeta}_{n} = (\sum_{t=1}^{n} \bX_{t} \bX_{t}')^{-1} \sum_{t=1}^{n} \bX_{t} Y_{t}$, $\ddot{\tau}_{n} = (n^{-1} \sum_{t=1}^{2} \ddot{U}_{t}^{2})^{1/2}$, $\ddot{U}_{t} := Y_{t} - \bX_{t}' \ddot{\bbeta}_{n}$, $\Delta(\ddot{\bbeta}_{n}) := \{ \bx \in \mathbb{R}^{k}: \bx'\bx= 1 \}$, $DL_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma}) = \{m(s_{\mu}, s_{\sigma})/\ddot{\tau}_{n}^{2}\} \sum_{t=1}^{n}\ddot{U}_{t}$, and
\begin{align*}
-\widetilde{D}^{2}L_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma}, \bs_{\bbeta}) = & \frac{1}{\ddot{\tau}_{n}^{4}} \sum_{t=1}^{n} \{s_{\sigma}^{2}(\ddot{\tau}_{n}^{2} - \ddot{U}_{t}^{2}) + \psi(s_{\mu}, s_{\sigma})^{2} \ddot{U}_{t}^{2} +\psi(s_{\mu}, s_{\sigma}) s_{\mu}(\ddot{U}_{t}^{2} + \ddot{\tau}_{n}^{2}) + s_{\mu}^{2} \ddot{\tau}_{n}^{2}\}\\
& - \frac{m(s_{\mu}, s_{\sigma})^{2}}{\ddot{\tau}_{n}^{2}} \sum_{t=1}^{n} \bs_{\bbeta}' \bX_{t} \left(\bs_{\bbeta}'\sum_{t=1}^{n} \bX_{t} \bX_{t}' \bs_{\bbeta}\right)^{-1} \sum_{t=1}^{n} \bX_{t}' \bs_{\bbeta}.
\end{align*}
In particular, applying the LLN implies that for each $(s_{\mu}, s_{\sigma})$,
\begin{equation*}
-\frac{1}{n}\widetilde{D}^{2}L_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma}, \bs_{\bbeta}) = \frac{m(s_{\mu}, s_{\sigma})^{2}}{\tau_{*}^{2}}\{ 1 - \bs_{\bbeta}' E[\bX_{t}] (\bs_{\bbeta}' E[\bX_{t} \bX_{t}'] \bs_{\bbeta})^{-1} E[\bX_{t}']\bs_{\bbeta} \} + o_{\bbP}(1).
\end{equation*}
This LLN holds uniformly on $\Delta(\bupsilon_{0}) \times \Delta(\ddot{\bbeta}_{n})$. Thus, for each $(s_{\mu}, s_{\sigma}, \bs_{\bbeta})$, we may let
\begin{equation*}
\widetilde{W}_{n}(s_{\mu}, s_{\sigma}, \bs_{\bbeta}) := \frac{m(s_{\mu}, s_{\sigma})^{2}}{\tau_{*}^{2}}\left\{ 1 - n^{-1} \sum_{t=1}^{n} \bs_{\bbeta}'\bX_{t} \left(\bs_{\bbeta}' n^{-1} \sum_{t=1}^{n}\bX_{t} \bX_{t}' \bs_{\bbeta}\right)^{-1} n^{-1} \sum_{t=1}^{n} \bX_{t}'\bs_{\bbeta} \right\}.
\end{equation*}
Here, applying the proof of Corollary \ref{cor:1}(\emph{vii}) implies that
\begin{align*}
\sup_{\bs_{\bbeta} \in \Delta(\ddot{\bbeta}_{n})} &n\widetilde{W}_{n}(s_{\mu}, s_{\sigma}, \bs_{\bbeta}) \max\left[0, \frac{-D L_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma})}{\widetilde{D}^{2} L_{n}(\ddot{\btheta}_{n}; s_{\mu}, s_{\sigma}, \bs_{\bbeta})} \right]^{2}\\
&= \max\left[0, \frac{m(s_{\mu}, s_{\sigma})}{\vert m(s_{\mu}, s_{\sigma}) \vert} \frac{n^{-1/2}\sum_{t=1}^{n} \ddot{U}_{t}}{ \{\tau_{*}^{2} (1 - E[\bX_{t}]' E[\bX_{t} \bX_{t}']^{-1} E[\bX_{t}]) \}^{1/2}} \right]^{2} + o_{\bbP}(1)
\end{align*}
by optimizing the objective function with respect to $\bs_{\bbeta}$, so that
\begin{equation*}
\cLM_{n} = \sup_{(s_{\mu}, s_{\sigma})\in \Delta(\bupsilon_{0})} \max\left[0, \frac{m(s_{\mu}, s_{\sigma})}{\vert m(s_{\mu}, s_{\sigma}) \vert} \frac{n^{-1/2}\sum_{t=1}^{n} \ddot{U}_{t}}{ \{\tau_{*}^{2} (1 - E[\bX_{t}]' E[\bX_{t} \bX_{t}']^{-1} E[\bX_{t}]) \}^{1/2}} \right]^{2} + o_{\bbP}(1)
\end{equation*}
under $H_{0}$. Therefore, $\cLM_{n} \asim \max[0, -Z]^{2}$ by noting that
\begin{equation*}
\frac{m(\,\cdot\,,\,\cdot\,)}{\vert m(\,\cdot\,,\,\cdot\,) \vert} = - 1
\end{equation*}
on $\Delta(\bupsilon_{0})$ and $n^{-1/2} \sum_{t=1}^{n} \ddot{U}_{t} \sim N[0, \tau_{*}^{2} (1 - E[\bX_{t}]' E[\bX_{t} \bX_{t}']^{-1} E[\bX_{t}])]$.

Before moving to the next example, some remarks are warranted. Here, we assume $\mu_{*} \geq 0$ so that $d_{\mu}$ is always greater than or equal to zero, and this is assumed to avoid the failure of numerical simulation. It is more general to suppose that $\mu_{*}$ can also be negative, so that for some positive $c >0$, $\mu_{*} \in [-c ,c]$. For such a case, for example, the null limit distribution of the QLR test is modified into
\begin{equation*}
\cLR_{n} \Rightarrow \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})'} \max\left[ 0, \frac{m(s_{\mu}, s_{\sigma})}{\vert m(s_{\mu}, s_{\sigma}) \vert} Z\right]^{2},
\end{equation*}
where $\Delta(\bupsilon_{0})' := \{ (s_{\mu}, s_{\sigma}) \in \mathbb{R}^{2} : s_{\mu}^{2} + s_{\sigma}^{2} =1\;\text{and}\; s_{\sigma} > 0\}$.  Furthermore, it analytically follows that $m(s_{\mu}, s_{\sigma}) /\vert m( s_{\mu}, s_{\sigma}) \vert = -1$ uniformly on $\Delta(\bupsilon_{0})'$, so that $\cLR_{n} \Rightarrow \max[0, -Z]^{2}$, which is the same as for the case in which $\mu_{*} \geq 0$ is assumed. Nevertheless, Monte Carlo experiments showed that the empirical distribution of $\cLR_{n}$ exactly overlaps with that of $Z^{2}$ under the null.

This discrepancy arises mainly because the value of $m(s_{\mu}, s_{\sigma})$ sensitively responds to the value of $(s_{\mu}, s_{\sigma})$, so that we  obtain that $m(\,\cdot\,,\,\cdot\,) /\vert m(\,\cdot\,,\,\cdot\,) \vert = \pm 1$ numerically on $\Delta(\bupsilon_{0})'$, implying that
\begin{equation*}
\cLR_{n} \Rightarrow \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})'} \max\left[ 0, \frac{m(s_{\mu}, s_{\sigma})}{\vert m(s_{\mu}, s_{\sigma}) \vert} Z\right]^{2} = \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})'} \max\left[ 0, -Z, Z\right]^{2} = Z^{2}
\end{equation*}
as could be revealed by Monte Carlo experiments. More precisely, if $s_{\mu} < 0$ and $s_{\sigma} > 0$, so that we can let $s_{\mu} = -\sqrt{1-s_{\sigma}^{2}}$, it analytically follows that for any $s_{\sigma} > 0$, $m(-\sqrt{1-s_{\sigma}^{2}}, s_{\sigma}) < 0$, and $\lim_{s_{\sigma} \downarrow 0} m(-\sqrt{1-s_{\sigma}^{2}}, s_{\sigma}) = 0$. Nevertheless, computing $m(-\sqrt{1-s_{\sigma}^{2}}, s_{\sigma})$ requires a high level of precision when $s_{\sigma}$ is close to $0$. Standard statistical packages do not provide this level of precision. Numerically, most statistical packages compute $m(-\sqrt{1-s_{\sigma}^{2}}, s_{\sigma})$ that oscillates around zero as $s_{\sigma}$ converges to $0$, so that $m(\,\cdot\,,\,\cdot\,) /\vert m(\,\cdot\,,\,\cdot\,) \vert$ is obtained as $\pm 1$ on $\Delta(\bupsilon_{0})'$. We avoid this numerical failure by restricting our parameter space.

\subsection{Example 2: Box-Cox's (1964) Transformation}

Applying the directional derivatives makes model analysis more sensible for nonlinear models with irregular properties. Box and Cox's (\citeyear{Box1964}) transformation belongs to this case. We consider the following model:
\begin{equation}
Y_{t} = {{\bZ_{t}}'}\btheta_{0} +  \frac{\theta_{1}}{\theta_{2}} (X_{t}^{\theta_{2}} - 1) + U_{t}, \label{eq:sds}
\end{equation}
where $\{(Y_{t}, X_{t}, {{\bZ_{t}}'}) \in \mathbb{R}^{2+k} :t=1, 2, \cdots\}$ is assumed to be IID, $X_{t}$ is strictly greater than zero almost surely, and $U_{t} := Y_{t} - E[Y_{t} \vert \bZ_{t}, X_{t}]$. Furthermore, $\btheta := {{(\btheta_{0}', \theta_{1}, \theta_{2})}'} \in \bTheta_{0} \times \bTheta_{12}$, $\bTheta_{0}$ is a convex and compact set in $\mathbb{R}^{k}$, and
\begin{equation*}
\bTheta_{12} := \{(y,z) \in \mathbb{R}^{2}: \underline{c}y \leq z \leq \bar{c} y < \infty,\,0 < \underline{c} < \bar{c} < \infty,
\;\textrm{and}\; z^{2} + y^{2} \leq \bar{m} < \infty\}.
\end{equation*}
Our interests are in testing whether $X_{t}$ influences $E[Y_{t} \vert \bZ_{t}, X_{t}]$.

This model is introduced to avoid Davies's (\citeyear{Davies1977},\citeyear{Davies1987}) identification problem. If the Box-Cox transformation is specified in the conventional way as in Hansen (\citeyear{Hansen1996}) so that
\begin{equation*}
Y_{t} = {{\bZ_{t}}'}\btheta_{0} + \beta_{1} (X_{t}^{\gamma} - 1) + U_{t}
\end{equation*}
is assumed, then $\gamma_{*}$ is not identified when $\beta_{1*} = 0$, where the subscript `$*$' indicates the limit of the nonlinear least squares (NLS) estimator. We may instead examine another null hypothesis: $\gamma_{*} = 0$. Note that letting $\gamma_{*} = 0$ renders $\beta_{1*}$ unidentified.

We avoid the identification problem by reparameterizing the model using $\theta_{1}$ and $\theta_{2}$ as given in (\ref{eq:sds}). If $\theta_{2*} = 0$, $\theta_{1*}$ must be zero by the model condition on $\bTheta_{12}$, and the identification problem no longer arises.

Nevertheless, the reparameterized model becomes obscure by the null condition: $\theta_{1*} = 0$ and $\theta_{2*} = 0$. If so, the null model is not properly obtained from the given model specification. Note that $\theta_{1*} (X_{t}^{\theta_{2*}} - 1)/\theta_{2*} = 0 \times 0/0$, implying that the standard test statistics cannot be applied.

On the other hand, the directional limits are well defined, and they can be used to analyze the asymptotic behavior of the quasi-likelihood. For this purpose, we let $\bd = {{(\bd_{0}', d_{1}, d_{2})}'}$ and $\btheta_{*} = {{({{\btheta_{0*}}'}, 0, 0)}'}$ with $\btheta_{0*}$ interior to  $\bTheta_{0}$. The following quasi-likelihood is obtained from this:
\begin{equation*}
L_{n}(\btheta_{*} + h \bd) =  - \frac{1}{2}\sum_{t=1}^{n} \left\{ Y_{t} - {{\bZ_{t}}'}(\btheta_{0*} + \bd_{0} h) - \frac{d_{1}}{d_{2}} (X_{t}^{d_{2} h} - 1) \right\}^{2},
\end{equation*}
which is now D with respect to $h$ at $0$. Therefore, for each $\bd$, $\lim_{h \downarrow 0} L_{n}(\btheta_{*} + h \bd) = - \frac{1}{2}\sum_{t=1}^{n} \{ Y_{t} - {{\bZ_{t}}'}\btheta_{0*} \}^{2}$. The first two directional derivatives are \begin{equation}
DL_{n}(\btheta_{*};\bd) = \sum_{t=1}^{n} U_{t} \{{{\bZ_{t}}'}\bd_{0} + \log(X_{t})d_{1}\}, \;\;\;\text{and}\label{foderiv}
\end{equation}
\begin{equation}
D^{2}L_{n}(\btheta_{*};\bd) = - \sum_{t=1}^{n}\{{{\bZ_{t}}'}\bd_{0} + \log(X_{t}) d_{1}\}^{2} + \sum_{t=1}^{n} U_{t} \{\log(X_{t})\}^{2}d_{1} d_{2}, \label{soderiv}
\end{equation}
which are linear and quadratic in $(\bd_{0}, d_{2}, d_{2})$, respectively. Therefore, the model may be analyzed as if it is D, although the null model is not properly obtained from the given model.

As a remark regarding this model, this reformulation implies that there is a hidden identification problem associated with $d_{1}/d_{2}$. Note that $d_{1}/d_{2}$ lacks its corresponding distance and disappears if $h$ is zero, so that $d_{1}/d_{2}$ is not identified at $\btheta_{*} = {{({{\btheta_{0*}}'}, 0, 0)}'}$.

Using the first- and second-order directional derivatives in (\ref{foderiv}) and (\ref{soderiv}),
\begin{equation*}
n^{-1/2} DL_{n}(\btheta_{*}; \bd) \Rightarrow {{\ddot{\bd}}'}\bW\;\;\;\textrm{and}\;\;\;n^{-1} D^{2}L_{n}(\btheta_{*}; \bd) \to {{\ddot{\bd}}'} \bA_{*} \ddot{\bd}
\end{equation*}
a.s.$-\bbP$, where $\ddot{\bd} \in \ddot{\Delta}(\btheta_{*}):= \{\bx \in \mathbb{R}^{k+1}: \Vert \bx \Vert = 1 \}$, $\bW$ is a multivariate normal:
\begin{equation*}
\left[\begin{array}{c} n^{-1/2}\sum U_{t}{{\bZ_{t}}'} \\ n^{-1/2}\sum U_{t} \log(X_{t}) \end{array}\right] \Rightarrow \bW := \left[\begin{array}{c} {\bW_{0}}'\\ W_{1} \end{array} \right] \sim N(\bzero, \bB_{*})
\end{equation*}
with $\bB_{*}$ being a $(k+1) \times (k+1)$ positive definite matrix with a finite maximum eigenvalue, and
\begin{equation*}
\bA_{*} := \left[\begin{array}{cc} \bA_{*}^{(0,0)} & \bA_{*}^{(0,1)}\\ \bA_{*}^{(1,0)} & \bA_{*}^{(1,1)} \end{array} \right] := \left[ \begin{array}{cc} -E[\bZ_{t}{{\bZ_{t}}'}] & -E[\bZ_{t}\log(X_{t})]\\ -E[\log(X_{t}){{\bZ_{t}}'}] & -E[\log(X_{t})^{2}] \end{array} \right].
\end{equation*}
Here, we assume $E[\log(X_{t})^{2}] < \infty$ and for each $j$, $E[Z_{t,j}^{2}] < \infty$ to obtain the weak limit $\bW$.

We separate the set of directions into $\ddot{\Delta}(\btheta_{*})$ and the set for $d_{2}$ and derive the asymptotic distribution more efficiently. Through this separation, the maximization process is separated into a two-step maximization process:
\begin{align*}
2\{L_{n}(\widehat{\btheta}_{n}) - L_{n}(\btheta_{*}) \} &\Rightarrow \sup_{d_{2}} \sup_{\ddot{\bd} \in \ddot{\Delta}(\btheta_{*})} \max[0, {{\bW}'}\ddot{\bd}]^{2}\{-{{\ddot{\bd}}'}\bA_{*} \ddot{\bd}\}^{-1} \\ &= \sup_{\ddot{\bd} \in \ddot{\Delta}(\btheta_{*})} \max[0, {{\bW}'}\ddot{\bd}]^{2}\{-{{\ddot{\bd}}'}\bA_{*} \ddot{\bd}\}^{-1} = {{\bW}'}(-\bA_{*})^{-1} \bW
\end{align*}
by Theorem 1(\emph{iii}), where $\widehat{\btheta}_{n}$ is the NLS estimator, and applying the proof of Corollary \ref{cor:1}(\emph{vii}) obtains the last equality. Note that maximizing the limit with respect to $d_{2}$ is an innocuous process to obtaining the null limit distribution because $d_{2}$ vanishes at the limit. We note that the limit result is the same as what is obtained when an identified model is D.

%This result shows that the analysis of the current study can be used
%to show that the model is asymptotically identified. Although the
%quasi-likelihood is not properly identified under the null, it
%obtains the same result as obtained from D and identified models.

For data inference through this model, we let $\bpi = {{({{\blambda}'}, {{\bupsilon}'})}'}$ such that $\blambda = \btheta_{0}$ and $\bupsilon = \theta_{2}$, so that $\bOmega = \bTheta_{0}$, and $\bM$ is a closed interval with zero as an interior element. These parameters are introduced to follow the notational convention in Section 3 of Cho and White (\citeyear{Cho2017}). Note that $\theta_{1*} = 0$ if and only if $\theta_{2*} = 0$ from the model assumption. Using the model conditions, we can apply Theorem 2(\emph{iv}):
\begin{equation*}
\cLR_{n} \Rightarrow \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})} \max[0,\widetilde{\cY}^{(\bupsilon)}(\bs_{\bupsilon}) ]^{2},
\end{equation*}
where $\bs_{\bupsilon} := s_{1}$, $\Delta(\bupsilon_{0}):= \{-1, 1 \}$, and
\begin{equation*}
\widetilde{\cY}^{(\bupsilon)}(\bs_{\bupsilon}) := \frac{s_{1}\widetilde{\cZ}^{(\bupsilon)}}{\vert s_{1} \vert (\widetilde{A}_{*}^{(\bupsilon,\bupsilon)})^{1/2}} := \frac{s_{1}(W_{1} - (-\bA_{*}^{(0,1)})(-\bA_{*}^{(0,0)})^{-1}\bW_{0})}{\vert s_{1} \vert \{(-A_{*}^{(1,1)})- (-A_{*}^{(1,0)})(-\bA_{*}^{(0,0)})^{-1}(-A_{*}^{(0,1)})\}^{1/2}}.
\end{equation*}
Note that $s_{1} / \vert s_{1} \vert = \pm 1$, and from this
\begin{equation*}
\cLR_{n} \Rightarrow \widetilde{\cZ}^{(\bupsilon)}(\widetilde{A}_{*}^{(\bupsilon,\bupsilon)})^{-1}\widetilde{\cZ}^{(\bupsilon)}.
\end{equation*}
\noindent In a similar way, we can apply Theorem 3 to the Wald test statistic. Note that $\sqrt{n} \widetilde{h}_{n}^{(\bmu)}(\bs_{\bupsilon}) \Rightarrow (\widetilde{A}_{*}^{(\bupsilon,\bupsilon)})^{-1}$ $\max[0, s_{1}\widetilde{\cZ}^{(\bupsilon)}]$, and select $\widehat{W}_{n}$ to be a consistent estimator for $(\widetilde{A}_{*}^{(\bupsilon,\bupsilon)})^{-1}$. For example, if we let
\begin{equation*}
\widehat{W}_{n} := \{ (n^{-1} \sum \log(X_{t})^{2}) - (n^{-1} \sum \log(X_{t}) {{\bZ_{t}}'}) (n^{-1} \sum \bZ_{t} {{\bZ_{t}}'})^{-1} (n^{-1} \sum \bZ_{t} \log(X_{t})) \}^{-1},
\end{equation*}
then
\begin{equation*}
\cW_{n} := n \{\widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon})\} \{ \widehat{W}_{n} \} \{\widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon})\} \Rightarrow \widetilde{\cZ}^{(\bupsilon)}(\widetilde{A}_{*}^{(\bupsilon,\bupsilon)})^{-1}\widetilde{\cZ}^{(\bupsilon)}
\end{equation*}
by Theorem 3. Finally, Theorem 4 obtains the same null limit distribution for the LM test statistic using the same weight function.

\subsection{Example 3: Generalized Method of Moments (GMM) }

Hansen (\citeyear{Hansen1982}) examined an estimation method by generalizing the method of moments estimation which requires differentiability as one of the regularity conditions. We consider the GMM estimator $\widehat{\btheta}_{n}$ obtained by maximizing
\begin{equation*}
Q_{n}(\btheta):= \bg_{n}{{(\bX^{n}; \btheta)}'} \,\{-\bM_{n}\}^{-1} \,\bg_{n}(\bX^{n}; \btheta)
\end{equation*}
with respect to $\btheta$, where $\{\bX_{t}: t= 1, 2, \cdots \}$ is a sequence of strictly stationary and ergodic random variables, $\bg_{n}(\bX^{n}; \btheta):= n^{-1} \sum_{t=1}^{n} \bq(\bX_{t}; \btheta)$ with $\bq_{t}:= \bq(\bX_{t};\,\cdot\, ): \bTheta \mapsto \mathbb{R}^{k}$ being D a.s.--$\bbP$ on $\bTheta$ given in Assumption \ref{assm:2} ($r \leq k$) such that for each $\btheta \in \bTheta$, $\bq(\,\cdot\,;\btheta)$ is measurable, and $\bM_{n}$ is a symmetric and positive definite random matrix a.s.--$\bbP$ uniformly in $n$ that converges to a symmetric and positive definite $\bM_{*}$ a.s.--$\bbP$. Furthermore, for some integrable $m(\bX_{t})$, $\Vert
\bq_{t}(\,\cdot\,) \Vert_{\infty} \leq m(\bX_{t})$ and $\Vert \nabla_{\btheta} \bq_{t}(\,\cdot\,) \Vert_{\infty} \leq m(\bX_{t})$, and there is a unique $\btheta_{*}$ which maximizes $E[\bq_{t}(\cdot)]' \{-\bM_{*}\}^{-1} E[\bq_{t}(\cdot)]$ on the interior part of $\bTheta$. We denote the uniform matrix norm by $\Vert \cdot \Vert_{\infty}$. We further suppose that $n^{1/2} \bg_{n}(\bX^{n}; \btheta_{*}) \Rightarrow \bW \sim N(\bzero,
\bS_{*})$ for some positive-definite matrix $\bS_{*}$. The GMM estimator is widely applied for empirical studies.

The given conditions for $Q_{n}(\cdot)$ do not exactly satisfy the conditions in Assumption \ref{assm:2}. Even so, our D-D analysis can be easily adapted to the GMM estimation framework.  Directional derivatives play a key role as before. We note that the first-order directional derivative of $\bg_{n}(\,\cdot\,):= \bg_{n}(\bX^{n};\,\cdot\,)$ is
\begin{equation}
D\bg_{n}(\btheta; \bd) = \nabla_{\btheta}\bg_{n}{{(\bX^{n}; \btheta)}'}\bd, \label{gmmdif1}
\end{equation}
where $\nabla_{\btheta}\bg_{n}(\bX^{n}; \btheta) := [\nabla_{\theta_{1}} g_{1,n}(\bX^{n}; \btheta), \cdots, \nabla_{\theta_{r}} g_{k,n}(\bX^{n}; \btheta)]'$ and $g_{j,n}(\bX^{n}; \btheta)$ is the $j$-th element of $\bg_{n}(\bX^{n}; \btheta)$. As (\ref{gmmdif1}) makes it clear, $D \bg_{n}(\btheta;\bd)$ is now linear with respect to $\bd$. Applying the mean-value theorem implies that for each $\bd$,
\begin{equation}
\bg_{n}(\btheta;\bd)= \bg_{n}(\btheta_{*};\bd) + D\bg_{n}(\bar{\btheta}; \bd)(\btheta - \btheta_{*}). \label{fdg}
\end{equation}
Here, $\bar{\btheta} := [\bar{\btheta}_{1}, \cdots, \bar{\btheta}_{r}]$ is the collection of the parameter values between $\btheta$ and $\btheta_{*}$, and $D\bg_{n}(\bar{\btheta}; \bd)$ denotes $[\nabla_{\theta_{1}} g_{1,n}(\bX^{n}; \bar{\btheta}_{1}), \cdots, \nabla_{\theta_{r}} g_{k,n}(\bX^{n}; \bar{\btheta}_{r})]' \bd$. Furthermore, $DQ_{n}(\btheta; \bd) = -2 \bd' \nabla_{\btheta} \bg_{n}(\btheta)' \bM_{n}^{-1} \bg_{n}(\btheta)$. This implies that for each $\bd$, $n^{1/2} DQ_{n}(\btheta_{*};\bd) \Rightarrow -2 \bd' \bC_{*}'\bM_{*}^{-1} \bW$ by the CLT. Here, we applied the LLN to obtain that $\nabla_{\btheta} \bg_{n}(\btheta_{*})$ converges to $\bC_{*} := E[\nabla_{\btheta}\bq_{t}(\btheta_{*})]$ a.s.$-\bbP$ by the fact that $\Vert \nabla_{\btheta} \bq_{t}(\,\cdot\,) \Vert_{\infty} \leq m(\bX_{t})$. We below use these facts and the vehicles for D-D analysis to obtain the asymptotic behavior of the GMM estimator.

Given (\ref{gmmdif1}), it is trivial to show that $\{n^{1/2} DQ_{n}(\btheta_{*};\,\cdot\,) \}$ is asymptotically tight by the fact that it is linear with respect to $\bd$. Next, we obtain that for some $\bar{\btheta}$ between $\btheta$ and $\btheta_{*}$,
\begin{equation*}
n\{Q_{n}(\btheta) - Q_{n}(\btheta_{*})\} = -2 {{\bd}'}{{\nabla_{\btheta}\bg_{n}(\bar{\btheta})}'} \bM_{n}^{-1} \sqrt{n}\bg_{n}(\btheta_{*})\sqrt{n} h - {{\bd}'} \nabla_{\btheta}\bg_{n}(\bar{\btheta})' \bM_{n}^{-1} \nabla_{\btheta}\bg_{n}(\bar{\btheta}) \bd (\sqrt{n}h)^{2}
\end{equation*}
by substituting $\bg_{n}$ in (\ref{fdg}) into $Q_{n}(\cdot)$, and so
\begin{equation*}
n\{Q_{n}(\widehat{\btheta}_{n}) - Q_{n}(\btheta_{*}) \} \Rightarrow \sup_{\bd}\sup_{h} - 2 {{\bd}'} {{\bC_{*}}'} \bM_{*}^{-1} \bW h - {{\bd}'} {{\bC_{*}}'} \bM_{*}^{-1} \bC_{*} \bd h^{2}.
\end{equation*}
We may let $\cZ(\bd):= - {{\bd}'} {{\bC_{*}}'} \bM_{*}^{-1} \bW$ and $A_{*}(\bd):= - {{\bd}'} {{\bC_{*}}'} \bM_{*}^{-1}$ $\bC_{*} \bd$. Note that these derivatives are linear and quadratic in $\bd$, respectively. Therefore,
\begin{equation*}
n\{Q_{n}(\widehat{\btheta}_{n}) - Q_{n}(\btheta_{*}) \} \Rightarrow {{\bW}'} \bM_{*}^{-1} \bC_{*} \{- {{\bC_{*}}'} \bM_{*}^{-1} \bC_{*} \}^{-1} {{\bC_{*}}'}\bM_{*}^{-1} \bW
\end{equation*}
by applying Corollary \ref{cor:1}(\emph{vii}). Furthermore, we obtain that
\begin{align*}
\sqrt{n}(\widehat{\btheta}_{n} - \btheta_{*}) \Rightarrow & -\{{{\bC_{*}}'} \bM_{*}^{-1} \bC_{*} \}^{-1} {{\bC_{*}}'} \bM_{*}^{-1} \bW\\ &\sim N(\bzero, \{{{\bC_{*}}'} \bM_{*}^{-1} \bC_{*} \}^{-1}\{{{\bC_{*}}'}\bM_{*}^{-1}\bS_{*}\bM_{*}^{-1}\bC_{*}\} \{{{\bC_{*}}'} \bM_{*}^{-1} \bC_{*} \}^{-1}).
\end{align*}
These are the same results as for the standard GMM literature (e.g., Newey and West, \citeyear{Newey1987}).

As the objective function is D, we simply let $\btheta = \bpi = (\bupsilon', \blambda')'$ for testing the hypothesis and follow the notational convention in Section 3 of Cho and White (\citeyear{Cho2017}). Note that the objective function $Q_{n}(\cdot)$ does not satisfy the condition in Assumption \ref{assm:2}, by which the definition of the QLR test statistic cannot be applied. Nevertheless, we similarly define a QLR test-like test statistic. We let
\begin{eqnarray*}
\cQLR_{n}:= n\{\sup_{\bupsilon, \blambda}Q_{n}(\bupsilon, \blambda) - \sup_{\blambda} Q_{n}(\bupsilon_{0}, \blambda)\}
\end{eqnarray*}
and let ${{\bC_{*}}'}\{-\bM_{*}\}^{-1}\bW$ and ${{\bC_{*}}'}\{-\bM_{*}\}^{-1} \bC_{*}$ be $\bZ^{(\bpi)}={{({\bZ^{(\bupsilon)}}', {\bZ^{(\blambda)}}')}'}$ and $\bA_{*}^{(\bpi,\bpi)}$ in Cho and White (\citeyear{Cho2017}), respectively. The null limit distribution of the QLR test statistic is obtained as
\begin{equation*}
\cQLR_{n} \Rightarrow {{(\widetilde{\bZ}^{(\bupsilon)})}'} (-\widetilde{\bA}_{*}^{(\bupsilon,\bupsilon)})^{-1}(\widetilde{\bZ}^{(\bupsilon)}),
\end{equation*}
where $\widetilde{\bZ}^{(\bupsilon)}:= \bZ^{(\bupsilon)} - (\bA_{*}^{(\bupsilon,\blambda)})(\bA_{*}^{(\blambda,\blambda)})^{-1} \bZ^{(\blambda)}$ and $\widetilde{\bA}_{*}^{(\bupsilon,\bupsilon)}:= \bA_{*}^{(\bupsilon,\bupsilon)} - (\bA_{*}^{(\bupsilon,\blambda)})(\bA_{*}^{(\blambda,\blambda)})^{-1} {{(\bA_{*}^{(\blambda,\bupsilon)})}'}$ by applying Corollary \ref{cor:1}.

We can define the Wald test statistic using the GMM estimator and derive its null limit distribution as before. That is,
\begin{equation*}
\cQW_{n}:= \sup_{\bs_{\bupsilon} \in \Delta(\bupsilon_{0})} n\{\widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon})\} \{\widehat{W}_{n}(\bs_{\bupsilon})\}\{\widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon})\},
\end{equation*}
where $\widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon})$ is such that for each $\bs_{\bupsilon} \in \Delta(\bupsilon_{0})$,
\begin{equation*}
Q_{n}(\bupsilon_{0} + \widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon}) \bs_{\bupsilon}, \widetilde{\blambda}_{n}(\bs_{\bupsilon})) := \sup_{\{h^{(\bupsilon)}, \blambda \}} Q_{n}(\bupsilon_{0} + h^{(\bupsilon)}\bs_{\bupsilon}, \blambda),
\end{equation*}
and its null limit distribution is obtained by applying Theorem 3. Note that the definition of $\cQW_{n}$ is exactly the same as $\cW_{n}$ except that $\widetilde{h}_{n}^{(\bupsilon)}(\bs_{\bupsilon})$ is defined using $Q_{n}(\cdot)$ instead of $L_{n}(\cdot)$. If we further let the weight function $\widehat{W}_{n}(\bs_{\bupsilon})$ be ${{\bs_{\bupsilon}}'} \widehat{W}_{n} \bs_{\bupsilon}$ such that $\widehat{W}_{n}$ converges to $-\widetilde{\bA}_{*}^{(\bupsilon,\bupsilon)}$ a.s.$-\bbP$,
\begin{equation*}
\cQW_{n} \Rightarrow \sup_{\bs_{\bupsilon}\in\Delta(\bupsilon_{0})} \max[0, {{\bs_{\bupsilon}}'}\widetilde{\bZ}^{(\bupsilon)}](-{{\bs_{\bupsilon}}'}\widetilde{\bA}_{*}^{(\bupsilon,\bupsilon)}\bs_{\bupsilon})^{-1} \max[0, {{\bs_{\bupsilon}}'}\widetilde{\bZ}^{(\bupsilon)}].
\end{equation*}
The proof of Corollary \ref{cor:1}(\emph{vii}) corroborates that the null limit distribution of $\cQW_{n}$ is equivalent to that of $\cQLR_{n}$ particularly because $\bupsilon_{0}$ is an interior element.

Finally, we define the LM test statistic in the GMM context and examine its null limit distribution. For this purpose, we let
\begin{equation*}
\cQLM_{n} := \sup_{(\bs_{\bupsilon}, \bs_{\blambda}) \in \Delta(\bupsilon_{0}) \times \Delta(\ddot{\blambda}_{n})} n \widetilde{W}_{n}(\bs_{\bupsilon}, \bs_{\blambda}) \max\left[0, \frac{DQ_{n}(\ddot{\btheta}_{n}; \bs_{\bupsilon})}  {2\widetilde{D}^{2}Q_{n}(\ddot{\btheta}_{n}; \bs_{\bupsilon}, \bs_{\blambda})}\right]^{2},
\end{equation*}
where for each $(\bs_{\bupsilon}, \bs_{\blambda})$,
\begin{align*}
\widetilde{D}^{2}Q_{n}&(\ddot{\btheta}_{n}; \bs_{\bupsilon}, \bs_{\blambda}) := D\bg_{n}(\ddot{\btheta}_{n};  \bs_{\bupsilon})'\{-\bM_{n}\}^{-1}D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\bupsilon})\\ &-D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\bupsilon})'\{-\bM_{n}\}^{-1}D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\blambda})\{D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\blambda})'\{-\bM_{n}\}^{-1}D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\blambda})\}^{-1}\\
&\;\;\;\times D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\blambda})'\{-\bM_{n}\}^{-1}D\bg_{n}(\ddot{\btheta}_{n}; \bs_{\bupsilon}),
\end{align*}
and $\ddot{\btheta}_{n}:= (\bupsilon_{0}, \ddot{\blambda}_{n})$ such that $\ddot{\blambda}_{n} := \arg\max_{\blambda} Q_{n}(\bupsilon_{0}, \blambda)$. If we let $\widetilde{W}_{n}(\bs_{\bupsilon}, \bs_{\blambda}) = {{\bs_{\bupsilon}}'}\widehat{W}_{n}\bs_{\bupsilon}$ for each $(\bs_{\bupsilon}, \bs_{\blambda}) \in \Delta(\bupsilon_{0}) \times \Delta(\blambda_{*})$,
\begin{equation*}
\cQLM_{n} \Rightarrow {{(\widetilde{\bZ}^{(\bupsilon)})}'}
(-\widetilde{\bA}_{*}^{(\bupsilon,\bupsilon)})^{-1}(\widetilde{\bZ}^{(\bupsilon)})
\end{equation*} by Theorem 4, the interiority condition of $\bupsilon_{0}$, and the proof of Corollary \ref{cor:1}(\emph{vii}), where $\widehat{W}_{n}$ is the weight matrix used for $\cQW_{n}$.

Indeed, many other nonlinear models share the D-D model features in addition to the models we examined here. For example, table 1 of Cheng, Evans, and Iles (\citeyear{Cheng1992}) collects a number of nonlinear models with parameter instability problems. Many of them can be analyzed using the approach of the current study. Furthermore, D-D analysis simplifies dimensional complexities that arise when higher-order approximations are necessary for model analysis. Cho, Ishida, and White (\citeyear{Cho2011b}, \citeyear{Cho2014}) and White and Cho (\citeyear{White2012}) revisit testing neglected nonlinearity using artificial neural networks, and it requires higher-order model approximations. They resolve the relevant issues by applying the D-D model analysis of this study.

\section{Differentiable Model and Directionally Differentiable Model}

In this section, we provide sufficient conditions for a twice D-D function to be twice D.

\begin{theorem}
\label{thm1} If a function $f: \bTheta \mapsto \mathbb{R}$ is (i) D-D on $\bTheta$; (ii) for each $\btheta, {\btheta'}$ and for some $M < \infty$, $\vert D f({\btheta'}; \bd) - D f(\btheta; \bd) \vert \leq M \Vert {\btheta'} - \btheta \Vert$ uniformly on $\Delta(\btheta) \cap\Delta({\btheta'})$; and (iii) for each $\btheta \in \bTheta$, $Df(\btheta; \bd)$ is linear in $\bd \in \Delta(\btheta)$, then $f: \bTheta \mapsto \mathbb{R}$ is D on $\bTheta$. $\wbox$
\end{theorem}

\bigskip\noindent\textbf{Proof of Theorem \ref{thm1}}: Refer to Troutman (\citeyear{Troutman1996}, p. 122). $\bbox$

\begin{theorem}
\label{thm2} In addition to the conditions in Theorem \ref{thm1}, if a function $f: \bTheta \mapsto \mathbb{R}$ is (i) twice D-D on $\bTheta$; (ii) for each $\btheta, {\btheta'}$ and for some $M < \infty$, $\vert D^{2} f({\btheta'}; \wtilde{\bd}; \bd) - D^{2} f(\btheta; \wtilde{\bd}; \bd) \vert \leq M \Vert {\btheta'} - \btheta \Vert$ uniformly on $\Delta(\btheta) \cap\Delta({\btheta'}) \times \Delta(\btheta) \cap\Delta({\btheta'})$; and (iii) for each $\btheta \in \bTheta$, the directional derivative of $Df(\btheta; \bd)$ with respect to $\wtilde{\bd}$ is linear in $\wtilde{\bd} \in \Delta(\btheta)$, then $f: \bTheta \mapsto \mathbb{R}$ is twice D on $\bTheta$. $\wbox$
\end{theorem}

\bigskip\noindent\textbf{Proof of Theorem \ref{thm2}}: To show the given claim, we note that $f(\cdot)$ is differentiable on $\bTheta$ by Theorem \ref{thm1} and denote the gradient of $f(\cdot)$ as $\bA(\cdot)$. We next show that for some $\bB(\cdot)$,
\begin{equation*}
\lim_{\Vert \wtilde{\btheta} - \btheta_{0} \Vert \to 0}\sup_{\Vert \btheta - \btheta_{0} \Vert = 1} \frac{1}{\Vert \wtilde{\btheta} - \btheta_{0} \Vert} \left\vert \bA(\wtilde{\btheta})' (\btheta - \btheta_{0}) - \bA(\btheta_{0})' (\btheta - \btheta_{0}) - (\wtilde{\btheta} - \btheta_{0})' \bB(\btheta_{0})(\btheta - \btheta_{0}) \right\vert = 0.
\end{equation*}
%Here, we note that
%\begin{align*}
%\sup_{\Vert \btheta - \btheta_{0} \Vert = 1} & \left\vert
%\bA(\wtilde{\btheta})' (\btheta - \btheta_{0}) - \bA(\btheta_{0})'
%(\btheta - \btheta_{0}) - (\wtilde{\btheta} - \btheta_{0})'
%\bB(\btheta_{0})(\btheta -
%\btheta_{0}) \right\vert \\
%& \leq \sup_{\Vert \btheta - \btheta_{0} \Vert = 1} \left\Vert
%\bA(\wtilde{\btheta}) - \bA(\btheta_{0}) - \bB(\btheta_{0})
%(\wtilde{\btheta}
%- \btheta_{0}) \right\Vert \cdot \Vert \btheta - \btheta_{0} \Vert\\
% & \;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;\;
% = \left\Vert \bA(\wtilde{\btheta}) - \bA(\btheta_{0}) -
%\bB(\btheta_{0}) (\wtilde{\btheta} - \btheta_{0}) \right\Vert.
%\end{align*}
%Therefore, if for some $\bB(\cdot)$,
%\begin{equation*}
%\lim_{\Vert \wtilde{\btheta} - \btheta_{0} \Vert \to 0}
%\frac{1}{\Vert \wtilde{\btheta} - \btheta_{0} \Vert} \left\Vert
%\bA(\wtilde{\btheta}) - \bA(\btheta_{0}) - \bB(\btheta_{0})
%(\wtilde{\btheta} - \btheta_{0}) \right\Vert = 0,
%\end{equation*}
%the desired result follows.

If we let $g(h) := f(\btheta_{0} + h \bd)$, $g(\cdot)$ is twice D from the given condition, so that we can apply the mean-value theorem: for some $\bar{h} \geq 0$
\begin{equation*}
g'(h) = g'(0) + g''(\bar{h}) h,
\end{equation*}
implying that $Df(\btheta_{0} + h\wtilde{\bd} ; \bd)  = Df(\btheta_{0};\bd) + {D}^{2}f(\btheta_{0}; \bd; \wtilde{\bd}) h \bar{h}$, where
\begin{equation*}
{D}^{2}f(\btheta_{0}; \wtilde{\bd}; \bd) := \lim_{h \downarrow 0} \frac{Df(\btheta_{0} + h \wtilde{\bd};\bd) - Df(\btheta_{0} ;\bd)}{h} .
\end{equation*}
Given this, note that the given conditions imply that $Df(\btheta_{0}; \bd) = \bA(\btheta_{0})' \bd$ and ${D}^{2}f(\btheta_{0}; \wtilde{\bd}; \bd) = \wtilde{\bd}' \bB(\btheta_{0}) \bd$. Therefore, if we let $\wtilde{\btheta}:= \btheta_{0} + h \wtilde{\bd}$, then $\bA(\wtilde{\btheta})' \bd  = \bA(\btheta_{0})'\bd + h\wtilde{\bd}' \bB(\btheta_{0} + \bar{h} \wtilde{\bd}) \bd$, so that
\begin{equation*}
\bA(\wtilde{\btheta})' \bd  - \bA(\btheta_{0})'\bd  - h \wtilde{\bd} '\bB(\btheta_{0}) \bd \leq  h \wtilde{\bd} '\bB(\btheta_{0} + \bar{h}\wtilde{\bd} ) \bd - h \wtilde{\bd} '\bB(\btheta_{0}) \bd,
\end{equation*}
implying that
\begin{equation*}
\frac{1}{h} \vert \bA(\wtilde{\btheta})' \bd  - \bA(\btheta_{0})'\bd - h \wtilde{\bd} '\bB(\btheta_{0}) \bd \vert \leq \vert \wtilde{\bd}' [\bB(\btheta_{0} + \bar{h} \wtilde{\bd}) - \bB(\btheta_{0}) ] \bd\vert \leq M \cdot \Vert \wtilde{\btheta} - \btheta_{0} \Vert,
\end{equation*}
where the last inequality follows from the uniform bound condition. We further note that $h = \Vert \wtilde{\btheta} - \btheta_{0} \Vert$. This implies that
\begin{equation*}
\lim_{\Vert \wtilde{\btheta} - \btheta_{0} \Vert \to 0} \frac{1}{\Vert \wtilde{\btheta} - \btheta_{0} \Vert} \vert \bA(\wtilde{\btheta})' \bd  - \bA(\btheta_{0})'\bd - h \wtilde{\bd} '\bB(\btheta_{0}) \bd \vert \leq \lim_{\Vert \wtilde{\btheta} - \btheta_{0} \Vert \to 0}  M \cdot \Vert \wtilde{\btheta} - \btheta_{0} \Vert =0.
\end{equation*}
This completes the proof. $\bbox$

\begin{thebibliography}{90}

\harvarditem{Aigner}{1977}{Aigner1977} \textsc{Aigner, D., Lovell, C., and Schmidt, P.} (1977): ``Formulation and Estimation of Stochastic Frontier Production Function Models,'' \emph{Journal of Econometrics}, 6, 21--37.

%\harvarditem{Andrews}{1994}{Andrews1994} \textsc{Andrews, D.} (1994): ``Empirical Process Methods in Econometrics,'' in R. Engle and D. McFadden, eds. \emph{Handbook of Econometrics}, 4, 2247--2294.

%\harvarditem{Andrews}{1999}{Andrews1999} \textsc{Andrews, D.} (1999): ``Estimation When a Parameter is on a Boundary,'' \emph{Econometrica}, 67, 543--563.

%\harvarditem{Andrews}{2001}{Andrews2001} \textsc{Andrews, D.} (2001): ``Testing When a Parameter is on the Boundary of the Maintained Hypothesis,'' \emph{Econometrica}, 69, 683--734.

%\harvarditem{Baek2015}{2015}{Baek2015} \textsc{Baek, Y., Cho, J.S., and Phillips, P.C.B.} (2015): ``Testing Linearity Using Power Transforms of Regressors,'' \emph{Journal of Econometrics}, 187, 376--384.

%\harvarditem{Bates}{1985}{Bates1985} \textsc{Bates, C. and White, H.} (1985): ``A Unified Theory of Consistent Esetimation for Parametric Model,'' \emph{Econometric Theory}, 1, 151--178.

%\harvarditem{Billingsley}{1999}{Billingsley1999} \textsc{Billingsley, P.} (1999): \emph{Convergence of Probability Measures}. New York: Wiley.

\harvarditem{Box}{1964}{Box1964} \textsc{Box, G. and Cox, D.} (1964): ``An Analysis of Transformations,'' \emph{Journal of the Royal Statistical Society}, \emph{Series B}, 26, 211--252.

%\harvarditem{Chernoff}{1954}{Chernoff1954} \textsc{Chernoff, H.} (1954): ``On the Distribution of the Likelihood Ratio,'' \emph{The Annals of  Mathematical Statistics}, 54, 573--578.

\harvarditem{Cheng}{1992}{Cheng1992} \textsc{Cheng, R., B. Evans, and T. Iles.} (1992): ``Embedded Models in Non-Linear Regression,'' \emph{Journal of the Royal Statistical Society}, \emph{Series B}, 54, 877--888.

%\harvarditem{Cho}{2011}{Cho2011} \textsc{Cho, J.S.} (2011): ``Quasi-Maximum Likelihood Estimation Revisited Using the Distance and Direction Method,''  \emph{Journal of Economic Theory and Econometrics}, 23, 89--112.

%\harvarditem{ChoIshida}{2012}{ChoIshida2012} \textsc{Cho, J.S. and Ishida, I.} (2012): %``Testing for the Effects of Omitted Power Transformations,'' \emph{Economics %Letters}, 117, 287--290.

%\harvarditem{Cho}{2007}{Cho2007} \textsc{Cho, J.S. and White, H.} (2007): ``Testing for Regime Switching,'' \emph{Econometrica}, 75, 1671--1720.

%\harvarditem{Cho}{2010}{Cho2010} \textsc{Cho, J.S. and White, H.} (2010): ``Testing for Unobserved Heterogeneity in Exponential and Weibull Duration Models,'' \emph{Journal of Econometrics}, 157, 458--480.

%\harvarditem{Cho}{2011a}{Cho2011a} \textsc{Cho, J.S. and White, H.} (2011): ``Generalized Runs Tests for the IID Hypothesis,'' \emph{Journal of Econometrics}, 162, 326--344.

\harvarditem{Cho}{2017}{Cho2017} \textsc{Cho, J.S. and White, H.} (2017): ``Directionally Differentiable Econometric Models,'' Discussion Paper, School of Economics, Yonsei University.

%\harvarditem{Cho}{2011}{Cho2011c} \textsc{Cho, J.S., Cheong, T., and White, H.} (2011): ``Experience with the Weighted Bootstrap in Testing for  Unobserved Heterogeneity in Exponential and Weibull Duration Models,'' \emph{Journal of Economic Theory and Econometrics}, 22:2, 60--91.

\harvarditem{Cho}{2011}{Cho2011b} \textsc{Cho, J.S., Ishida, I., and White, H.} (2011): ``Revisiting Tests for Neglected Nonlinearity Using Artificial Neural Networks,'' \emph{Neural Computation}, 23, 1133--1186.

\harvarditem{Cho}{2014}{Cho2014} \textsc{Cho, J., Ishida, I., and White, H.} (2014): ``Testing for Neglected Nonlinearity Using Twofold Unidentified Models under the Null and Hexic Expansions,'' \emph{Essays in Nonolinear Time Series Econometrics}. 3--27. Oxford: Oxford University Press.

\harvarditem{Davies}{1977}{Davies1977} \textsc{Davies, R.} (1977): ``Hypothesis Testing When a Nuisance Parameter is Present Only under the Alternative,'' \emph{Biometrika}, 64, 247--254.

\harvarditem{Davies}{1987}{Davies1987} \textsc{Davies, R.} (1987): ``Hypothesis Testing When a Nuisance Parameter is Present Only under the Alternative,'' \emph{Biometrika}, 74, 33--43.

%\harvarditem{Doukhan}{1995}{Doukhan1995} \textsc{Doukhan, P., P. Massart, and E. Rio.} (1995): ``Invariance Principles for Absolutely Regular Empirical Processes,'' \emph{Annales de l'Institut Henri Poincar\'{e}, Probabilites et Statistiques}, 31, 393--427.

\harvarditem{Dufour}{2006}{Dufour2006} \textsc{Duofur, J.-M.} (2006): ``Monte Carlo Tests with Nuisance Parameters: A General Approach to Fininte-Sample Inference and Nonstandard Asymptotics in Ecoonometrics,'' \emph{Journal of Econometrics}, 133, 443--477.

\harvarditem{Dutta}{1999}{Dutta1999} \textsc{Dutta, S., Narasimhan, O., and Rajiv, S.} (1999): ``Success in High-Technology Markets: Is Marketing Capability Critical?'' \emph{Marketing Science}, 18, 547--598.

%\harvarditem{Gallnat}{1988}{Gallant1988} \textsc{Gallant, R. and White, H.} (1988): \emph{A Unified Theory of Estimation and Inference for Nonlinear Dynamic Models}. New York: Basil Blackwell.

\harvarditem{Habib}{2005}{Habib2005} \textsc{Habib, M. and Ljungqvist, A.} (2005): ``Firm Values and Managerial Incentives: A Stochastic Frontier Approach,'' \emph{Journal of Business}, 78, 2053--2094.

\harvarditem{Hansen}{1982}{Hansen1982} \textsc{Hansen, L.} (1982): ``Large Sample Properties of Generalized Method of Moments Estimators,'' \emph{Econometrica}, 50, 1029--1054.

%\harvarditem{Hansen}{1996a}{Hansen1996a} \textsc{Hansen, B.} (1996a): ``Stochastic Equicontinuity for Unbounded Dependent Heterigeneous Arrays,'' \emph{Econometric Theory}, 12, 347--359.

\harvarditem{Hansen}{1996}{Hansen1996} \textsc{Hansen, B.} (1996): ``Inference When a Nuisance Parameter Is Not Identified under the Null Hypothesis,'' \emph{Econometrica}, 64, 413--440.

%\harvarditem{King}{1993}{King1993} \textsc{King, M. and T. Shively.} (1993): ``Locally Optimal Testing When a Nuisance Parameter is Present Only Under the Alternative,'' \emph{The Review of Economics and Statistics}, 75, 1--7.

\harvarditem{McLeish}{1974}{McLeish1974} \textsc{McLeish, D.} (1974): ``Dependent Central Limit  Theorem and Invariance Principles,'' \emph{The Annals of Probability}, 2, 620--628.

\harvarditem{Newey}{1987}{Newey1987} \textsc{Newey, W. and West, K.} (1987): ``Hypothesis Testing with Efficient Menthod of Moment Estimation,``  \emph{International Economic Review}, 28, 777--787.

%\harvarditem{Rosenberg}{1973}{Rosenberg1973} \textsc{Rosenberg, B.} (1973): ``The Analysis of a Cross-Section of Time Series by Stochastically Convergent Parameter Regression,'' \emph{Annals of Economic and Social Measurement}, 2, 399--428.

%\harvarditem{Rudin}{1976}{Rudin1976} \textsc{Rudin, W.} (1976): \emph{Principles of Mathematical Analysis}. New York: McGraw-Hill.

%\harvarditem{Scott}{1973}{Scott1973} \textsc{Scott, D.} (1973): ``Central Limit Theorems for Martingales and for Processes with Stationary Increments Using a Skorohkod Representation Approach,'' \emph{Advances in Applied Probability}, 5. 119--137.

%\harvarditem{Self}{1987}{Self1987} \textsc{Self, S. and Liang, K.} (1987): ``Asymptotic Properties of Maximum Likelihood Estimators and Likelihood Ratio Tests under Nonstandard Conditions,'' \emph{Journal of the American Statistical Association}, 82, 605--610.

\harvarditem{Stevenson}{1980}{Stevenson1980} \textsc{Stevenson, R.} (1980): ``Likelihood Functions for Generalized Stochastic Frontier Estimation,'' \emph{Journal of Econometrics}, 13, 57--66.

%\harvarditem{Stout}{1974}{Stout1974} \textsc{Stout, W.} (1974): \emph{Almost Sure Convergence}. New York: Academic Press.

\harvarditem{Troutman}{1996}{Troutman1996} \textsc{Troutman, J.} (1996): \emph{Variational Calculus and Optimal Control}. New York: Springer-Verlag.

%\harvarditem{Vaart}{1996}{Vaart1996} \textsc{van der Vaart, A. and Weller, J.} (1996): \emph{Weak Convergence and Empirical Processes with Applications to Statistics}. New York: Springer-Verlag.

%\harvarditem{Wald}{1943}{Wald1943} \textsc{Wald, A.} (1943): ``Tests of Statistical Hypotheses Concerning Several Parameters When the Number of Observations is Large,'' \emph{Transactions of the American Mathematical Society}, 54, 426--482.

%\harvarditem{Wald}{1949}{Wald1949} \textsc{Wald, A.} (1949): ``Note on the Consistency of the Maximum Likelihood Estimate,'' \emph{The Annals of Mathematical Statistics}, 20, 596--601.

%\harvarditem{White}{1994}{White1994} \textsc{White, H.} (1994): \emph{Estimation, Inference, and Specification Analysis}. New York: Cambridge University Press.

%\harvarditem{White}{2001}{White2001} \textsc{White, H.} (2001): \emph{Asymptotic Theory for Econometricians}. Orlando: Academic Press.

\harvarditem{White}{2012}{White2012} \textsc{White, H. and Cho, J.S.} (2012): ``Higher-Order Approximations for Testing Neglected Nonlinearity,'' \emph{Neural Computation}, 24, 273--287.

%\harvarditem{Wooldridge}{1988}{Wooldridge1988} \textsc{Wooldridge, J. and White, H.} (1988): ``Some Invariance Principles and Central Limit Theorems for Dependent Heterogeneous Processes,'' \emph{Econometric Theory}, 4, 210--230.

\harvarditem{Zellner}{1966}{Zellner1966} \textsc{Zellner, A., Kmenta, J., and Dr\`{e}ze, J.} (1966): ``Specification and Estimation of Cobb-Douglas Production Functions,'' \emph{Econometrica}, 34, 784--795.

\end{thebibliography}

\include{Figures}

\begin{figure}[b]
  \centering
  \includegraphics[scale=.35,angle=0]{figure4}
  \caption{\textsc{Empirical and Asymptotic Distributions of the QLR Test Statistic}. This figure shows the null limit distribution of the QLR test statistic, which is obtained as $\max[0, -Z]^{2}$, and the empirical distributions of the QLR test statistic for various sample sizes: $n= 50$, 100, and 500. The number of iterations for obtaining the empirical distributions is 2,000. We can see that the empirical distributions almost overlap with the null limit distribution even when the sample size is as small as 50.}   \label{fig:figure4}
\end{figure}

\end{document}