@article{moeller2024explainingvisionlanguagesimilaritiesdual, added-at = {2024-09-23T16:14:14.000+0200}, archiveprefix = {arXiv}, author = {Möller, Lucas and Tilli, Pascal and Vu, Ngoc Thang and Padó, Sebastian}, biburl = {https://puma.ub.uni-stuttgart.de/bibtex/267bce8a756b3f4caadaf1ff78f092c74/sp}, eprint = {2408.14153}, interhash = {7f8c73539476e5a2e564e85387bb6151}, intrahash = {67bce8a756b3f4caadaf1ff78f092c74}, journal = {Transactions on Machine Learning Research}, keywords = {myown preprint unpublished}, primaryclass = {cs.CV}, timestamp = {2025-07-29T13:00:41.000+0200}, title = {Explaining Caption-Image Interactions in CLIP models with Second-Order Attributions}, url = {https://openreview.net/forum?id=HUUL19U7HP}, year = 2025 }