We study the problem of estimating the smallest achievable mean-squared
error in regression function estimation. The problem is equivalent to
estimating the second moment of the regression function of Y on X is an
element of R-d . We introduce a nearest-neighbor-based estimate and
obtain a normal limit law for the estimate when X has an absolutely
continuous distribution, without any condition on the density. We also
compute the asymptotic variance explicitly and derive a non-asymptotic
bound on the variance that does not depend on the dimension d. The
asymptotic variance does not depend on the smoothness of the density of
X or of the regression function. A non-asymptotic exponential
concentration inequality is also proved. We illustrate the use of the
new estimate through testing whether a component of the vector X carries
information for predicting Y.
Devroye, L (Corresponding Author), McGill Univ, Sch Comp Sci, 3480 Univ St, Montreal, PQ H3A 0E9, Canada.
Devroye, Luc, McGill Univ, Sch Comp Sci, 3480 Univ St, Montreal, PQ H3A 0E9, Canada.
Gyorfi, Laszlo, Budapest Univ Technol & Econ, Dept Comp Sci & Informat Theory, Magyar Tudosok Krt 2, H-1117 Budapest, Hungary.
Lugosi, Gabor, Pompeu Fabra Univ, Dept Econ & Business, Pg Llus Co 23, Barcelona 08010, Spain.
Lugosi, Gabor, ICREA, Pg Llus Co 23, Barcelona 08010, Spain.
Lugosi, Gabor, Barcelona Grad Sch Econ, Barcelona, Spain.
Walk, Harro, Univ Stuttgart, Inst Stochast & Anwendungen, Pfaffenwaldring 57, D-70569 Stuttgart, Germany.
usage-count-last-180-days
0
web-of-science-categories
Statistics & Probability
number-of-cited-references
21
research-areas
Mathematics
funding-text
Luc Devroye was supported by the Natural Sciences and Engineering
Research Council (NSERC) of Canada.; Laszlo Gyorfi was supported by the
National University of Public Service under the priority project
KOFOP-2.1.2-VEKOP-15-2016-00001 titled ``Public Service Development
Establishing Good Governance'' in the Ludovika Workshop.; Gabor Lugosi
was supported by the Spanish Ministry of Economy and Competitiveness,
Grant MTM2015-67304-P and FEDER, EU.
funding-acknowledgement
Natural Sciences and Engineering Research Council (NSERC) of
CanadaNatural Sciences and Engineering Research Council of Canada
(NSERC); National University of Public Service
KOFOP-2.1.2-VEKOP-15-2016-00001; Spanish Ministry of Economy and
Competitiveness MTM2015-67304-P; FEDER, EU
%0 Journal Article
%1 WOS:000438839900052
%A Devroye, Luc
%A Gyorfi, Laszlo
%A Lugosi, Gabor
%A Walk, Harro
%C 3163 SOMERSET DR, CLEVELAND, OH 44122 USA
%D 2018
%I INST MATHEMATICAL STATISTICS
%J ELECTRONIC JOURNAL OF STATISTICS
%K imported
%N 1
%P 1752-1778
%R 10.1214/18-EJS1438
%T A nearest neighbor estimate of the residual variance
%V 12
%X We study the problem of estimating the smallest achievable mean-squared
error in regression function estimation. The problem is equivalent to
estimating the second moment of the regression function of Y on X is an
element of R-d . We introduce a nearest-neighbor-based estimate and
obtain a normal limit law for the estimate when X has an absolutely
continuous distribution, without any condition on the density. We also
compute the asymptotic variance explicitly and derive a non-asymptotic
bound on the variance that does not depend on the dimension d. The
asymptotic variance does not depend on the smoothness of the density of
X or of the regression function. A non-asymptotic exponential
concentration inequality is also proved. We illustrate the use of the
new estimate through testing whether a component of the vector X carries
information for predicting Y.
@article{WOS:000438839900052,
abstract = {We study the problem of estimating the smallest achievable mean-squared
error in regression function estimation. The problem is equivalent to
estimating the second moment of the regression function of Y on X is an
element of R-d . We introduce a nearest-neighbor-based estimate and
obtain a normal limit law for the estimate when X has an absolutely
continuous distribution, without any condition on the density. We also
compute the asymptotic variance explicitly and derive a non-asymptotic
bound on the variance that does not depend on the dimension d. The
asymptotic variance does not depend on the smoothness of the density of
X or of the regression function. A non-asymptotic exponential
concentration inequality is also proved. We illustrate the use of the
new estimate through testing whether a component of the vector X carries
information for predicting Y.},
added-at = {2021-09-13T10:24:34.000+0200},
address = {3163 SOMERSET DR, CLEVELAND, OH 44122 USA},
affiliation = {Devroye, L (Corresponding Author), McGill Univ, Sch Comp Sci, 3480 Univ St, Montreal, PQ H3A 0E9, Canada.
Devroye, Luc, McGill Univ, Sch Comp Sci, 3480 Univ St, Montreal, PQ H3A 0E9, Canada.
Gyorfi, Laszlo, Budapest Univ Technol \& Econ, Dept Comp Sci \& Informat Theory, Magyar Tudosok Krt 2, H-1117 Budapest, Hungary.
Lugosi, Gabor, Pompeu Fabra Univ, Dept Econ \& Business, Pg Llus Co 23, Barcelona 08010, Spain.
Lugosi, Gabor, ICREA, Pg Llus Co 23, Barcelona 08010, Spain.
Lugosi, Gabor, Barcelona Grad Sch Econ, Barcelona, Spain.
Walk, Harro, Univ Stuttgart, Inst Stochast \& Anwendungen, Pfaffenwaldring 57, D-70569 Stuttgart, Germany.},
author = {Devroye, Luc and Gyorfi, Laszlo and Lugosi, Gabor and Walk, Harro},
author-email = {lucdevroye@gmail.com
gyorfi@cs.bme.hu
gabor.lugosi@upf.edu
harro.walk@t-online.de},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/21d787ec0e700a243310945a760cebdc5/brittalenz},
da = {2021-08-10},
doc-delivery-number = {GN2PL},
doi = {10.1214/18-EJS1438},
funding-acknowledgement = {Natural Sciences and Engineering Research Council (NSERC) of
CanadaNatural Sciences and Engineering Research Council of Canada
(NSERC); National University of Public Service
{[}KOFOP-2.1.2-VEKOP-15-2016-00001]; Spanish Ministry of Economy and
Competitiveness {[}MTM2015-67304-P]; FEDER, EU},
funding-text = {Luc Devroye was supported by the Natural Sciences and Engineering
Research Council (NSERC) of Canada.; Laszlo Gyorfi was supported by the
National University of Public Service under the priority project
KOFOP-2.1.2-VEKOP-15-2016-00001 titled ``Public Service Development
Establishing Good Governance{''} in the Ludovika Workshop.; Gabor Lugosi
was supported by the Spanish Ministry of Economy and Competitiveness,
Grant MTM2015-67304-P and FEDER, EU.},
interhash = {34d1a9a1afda5e585125fae9e15c7d49},
intrahash = {1d787ec0e700a243310945a760cebdc5},
issn = {1935-7524},
journal = {ELECTRONIC JOURNAL OF STATISTICS},
journal-iso = {Electron. J. Stat.},
keywords = {imported},
language = {English},
number = 1,
number-of-cited-references = {21},
oa = {gold, Green Published, Green Submitted},
pages = {1752-1778},
publisher = {INST MATHEMATICAL STATISTICS},
research-areas = {Mathematics},
times-cited = {3},
timestamp = {2021-09-13T08:26:47.000+0200},
title = {A nearest neighbor estimate of the residual variance},
type = {Article},
unique-id = {WOS:000438839900052},
usage-count-last-180-days = {0},
usage-count-since-2013 = {0},
volume = 12,
web-of-science-categories = {Statistics \& Probability},
year = 2018
}