We study the performance of different pool-based Batch Mode Deep Active Learning (BMDAL) methods for regression on tabular data, focusing on methods that do not require to modify the network architecture and training. Our contributions are three-fold: First, we present a framework for constructing BMDAL methods out of kernels, kernel transformations and selection methods, showing that many of the most popular BMDAL methods fit into our framework. Second, we propose new components, leading to a new BMDAL method. Third, we introduce an open-source benchmark with 15 large tabular data sets, which we use to compare different BMDAL methods. Our benchmark results show that a combination of our novel components yields new state-of-the-art results in terms of RMSE and is computationally efficient. We provide open-source code that includes efficient implementations of all kernels, kernel transformations, and selection methods, and can be used for reproducing our results.
%0 Journal Article
%1 holzmueller_framework_2022
%A Holzmüller, David
%A Zaverkin, Viktor
%A Kästner, Johannes
%A Steinwart, Ingo
%D 2022
%J arXiv:2203.09410
%K exc2075 graduateschool myown pn6 preprint
%T A Framework and Benchmark for Deep Batch Active Learning for Regression
%X We study the performance of different pool-based Batch Mode Deep Active Learning (BMDAL) methods for regression on tabular data, focusing on methods that do not require to modify the network architecture and training. Our contributions are three-fold: First, we present a framework for constructing BMDAL methods out of kernels, kernel transformations and selection methods, showing that many of the most popular BMDAL methods fit into our framework. Second, we propose new components, leading to a new BMDAL method. Third, we introduce an open-source benchmark with 15 large tabular data sets, which we use to compare different BMDAL methods. Our benchmark results show that a combination of our novel components yields new state-of-the-art results in terms of RMSE and is computationally efficient. We provide open-source code that includes efficient implementations of all kernels, kernel transformations, and selection methods, and can be used for reproducing our results.
@article{holzmueller_framework_2022,
abstract = {We study the performance of different pool-based Batch Mode Deep Active Learning (BMDAL) methods for regression on tabular data, focusing on methods that do not require to modify the network architecture and training. Our contributions are three-fold: First, we present a framework for constructing BMDAL methods out of kernels, kernel transformations and selection methods, showing that many of the most popular BMDAL methods fit into our framework. Second, we propose new components, leading to a new BMDAL method. Third, we introduce an open-source benchmark with 15 large tabular data sets, which we use to compare different BMDAL methods. Our benchmark results show that a combination of our novel components yields new state-of-the-art results in terms of RMSE and is computationally efficient. We provide open-source code that includes efficient implementations of all kernels, kernel transformations, and selection methods, and can be used for reproducing our results.},
added-at = {2022-03-21T12:58:54.000+0100},
author = {Holzmüller, David and Zaverkin, Viktor and Kästner, Johannes and Steinwart, Ingo},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c60d6fbed2281c09769e637cf4bb7b9d/davidholzmller},
interhash = {27fa5d1aa438de56c68c52cc8ccc12d7},
intrahash = {c60d6fbed2281c09769e637cf4bb7b9d},
journal = {arXiv:2203.09410},
keywords = {exc2075 graduateschool myown pn6 preprint},
timestamp = {2022-08-16T09:16:24.000+0200},
title = {A Framework and Benchmark for Deep Batch Active Learning for Regression},
year = 2022
}