Hierarchical concepts have proven useful in many classical and learning-based optical flow methods regarding both accuracy and robustness. In this paper we show that such concepts are still useful in the context of recent neural networks that follow RAFT’s paradigm refraining from hierarchical strategies by relying on recurrent updates based on a single-scale all-pairs transform. To this end, we introduce MS-RAFT+: a novel recurrent multi-scale architecture based on RAFT that unifies several successful hierarchical concepts. It employs a coarse-to-fine estimation to enable the use of finer resolutions by useful initializations from coarser scales. Moreover, it relies on RAFT’s correlation pyramid that allows to consider non-local cost information during the matching process. Furthermore, it makes use of advanced multi-scale features that incorporate high-level information from coarser scales. And finally, our method is trained subject to a sample-wise robust multi-scale multi-iteration loss that closely supervises each iteration on each scale, while allowing to discard particularly difficult samples. In combination with an appropriate mixed-dataset training strategy, our method performs favorably. It not only yields highly accurate results on the four major benchmarks (KITTI 2015, MPI Sintel, Middlebury and VIPER), it also allows to achieve these results with a single model and a single parameter setting. Our trained model and code are available at https://github.com/cv-stuttgart/MS_RAFT_plus.
%0 Journal Article
%1 jahedi2023msraft
%A Jahedi, Azin
%A Luz, Maximilian
%A Rivinius, Marc
%A Mehl, Lukas
%A Bruhn, Andrés
%D 2023
%I Springer
%J International Journal of Computer Vision
%K sfbtrr161 b04 2023
%P 1573-1405
%R 10.1007/s11263-023-01930-7
%T MS-RAFT+: High Resolution Multi-Scale RAFT
%U https://doi.org/10.1007/s11263-023-01930-7
%X Hierarchical concepts have proven useful in many classical and learning-based optical flow methods regarding both accuracy and robustness. In this paper we show that such concepts are still useful in the context of recent neural networks that follow RAFT’s paradigm refraining from hierarchical strategies by relying on recurrent updates based on a single-scale all-pairs transform. To this end, we introduce MS-RAFT+: a novel recurrent multi-scale architecture based on RAFT that unifies several successful hierarchical concepts. It employs a coarse-to-fine estimation to enable the use of finer resolutions by useful initializations from coarser scales. Moreover, it relies on RAFT’s correlation pyramid that allows to consider non-local cost information during the matching process. Furthermore, it makes use of advanced multi-scale features that incorporate high-level information from coarser scales. And finally, our method is trained subject to a sample-wise robust multi-scale multi-iteration loss that closely supervises each iteration on each scale, while allowing to discard particularly difficult samples. In combination with an appropriate mixed-dataset training strategy, our method performs favorably. It not only yields highly accurate results on the four major benchmarks (KITTI 2015, MPI Sintel, Middlebury and VIPER), it also allows to achieve these results with a single model and a single parameter setting. Our trained model and code are available at https://github.com/cv-stuttgart/MS_RAFT_plus.
@article{jahedi2023msraft,
abstract = {Hierarchical concepts have proven useful in many classical and learning-based optical flow methods regarding both accuracy and robustness. In this paper we show that such concepts are still useful in the context of recent neural networks that follow RAFT’s paradigm refraining from hierarchical strategies by relying on recurrent updates based on a single-scale all-pairs transform. To this end, we introduce MS-RAFT+: a novel recurrent multi-scale architecture based on RAFT that unifies several successful hierarchical concepts. It employs a coarse-to-fine estimation to enable the use of finer resolutions by useful initializations from coarser scales. Moreover, it relies on RAFT’s correlation pyramid that allows to consider non-local cost information during the matching process. Furthermore, it makes use of advanced multi-scale features that incorporate high-level information from coarser scales. And finally, our method is trained subject to a sample-wise robust multi-scale multi-iteration loss that closely supervises each iteration on each scale, while allowing to discard particularly difficult samples. In combination with an appropriate mixed-dataset training strategy, our method performs favorably. It not only yields highly accurate results on the four major benchmarks (KITTI 2015, MPI Sintel, Middlebury and VIPER), it also allows to achieve these results with a single model and a single parameter setting. Our trained model and code are available at https://github.com/cv-stuttgart/MS_RAFT_plus.},
added-at = {2024-04-12T11:29:04.000+0200},
affiliation = {Jahedi, A; Luz, M (Corresponding Author), Univ Stuttgart, Inst Visualizat & Interact Syst, Stuttgart, Germany. Luz, M (Corresponding Author), Univ Freiburg, Robot Learning Lab, Freiburg, Germany. Jahedi, Azin; Luz, Maximilian; Mehl, Lukas; Bruhn, Andres, Univ Stuttgart, Inst Visualizat & Interact Syst, Stuttgart, Germany. Luz, Maximilian, Univ Freiburg, Robot Learning Lab, Freiburg, Germany. Rivinius, Marc, Univ Stuttgart, Inst Informat Secur, Stuttgart, Germany.},
author = {Jahedi, Azin and Luz, Maximilian and Rivinius, Marc and Mehl, Lukas and Bruhn, Andrés},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/222e991e9ac2eb1409536c7dc30b6abd3/sfbtrr161},
doi = {10.1007/s11263-023-01930-7},
interhash = {febb53cee99d4c2b391ac4dbe83309cc},
intrahash = {22e991e9ac2eb1409536c7dc30b6abd3},
issn = {{0920-5691} and {1573-1405}},
journal = {International Journal of Computer Vision},
keywords = {sfbtrr161 b04 2023},
pages = {1573-1405},
publisher = {Springer},
pubstate = {prepublished},
research-areas = {Computer Science},
timestamp = {2024-04-12T11:29:04.000+0200},
title = {MS-RAFT+: High Resolution Multi-Scale RAFT},
unique-id = {WOS:001126025000002},
url = {https://doi.org/10.1007/s11263-023-01930-7},
year = 2023
}