Even though fine-tuned neural language models have been pivotal in enabling “deep” automatic text analysis, optimizing text representations for specific applications remains a crucial bottleneck. In this study, we look at this problem in the context of a task from computational social science, namely modeling pairwise similarities between political parties. Our research question is what level of structural information is necessary to create robust text representation, contrasting a strongly informed approach (which uses both claim span and claim category annotations) with approaches that forgo one or both types of annotation with document structure-based heuristics. Evaluating our models on the manifestos of German parties for the 2021 federal election. We find that heuristics that maximize within-party over between-party similarity along with a normalization step lead to reliable party similarity prediction, without the need for manual annotation.
%0 Conference Paper
%1 ceron22:_optim
%A Ceron, Tanise
%A Blokker, Nico
%A Padó, Sebastian
%B Proceedings of CoNLL
%C Abu Dhabi, UAE
%D 2022
%K conference myown
%P 325--338
%T Optimizing text representations to capture (dis)similarity between political parties
%U https://aclanthology.org/2022.conll-1.22
%X Even though fine-tuned neural language models have been pivotal in enabling “deep” automatic text analysis, optimizing text representations for specific applications remains a crucial bottleneck. In this study, we look at this problem in the context of a task from computational social science, namely modeling pairwise similarities between political parties. Our research question is what level of structural information is necessary to create robust text representation, contrasting a strongly informed approach (which uses both claim span and claim category annotations) with approaches that forgo one or both types of annotation with document structure-based heuristics. Evaluating our models on the manifestos of German parties for the 2021 federal election. We find that heuristics that maximize within-party over between-party similarity along with a normalization step lead to reliable party similarity prediction, without the need for manual annotation.
@inproceedings{ceron22:_optim,
abstract = {Even though fine-tuned neural language models have been pivotal in enabling “deep” automatic text analysis, optimizing text representations for specific applications remains a crucial bottleneck. In this study, we look at this problem in the context of a task from computational social science, namely modeling pairwise similarities between political parties. Our research question is what level of structural information is necessary to create robust text representation, contrasting a strongly informed approach (which uses both claim span and claim category annotations) with approaches that forgo one or both types of annotation with document structure-based heuristics. Evaluating our models on the manifestos of German parties for the 2021 federal election. We find that heuristics that maximize within-party over between-party similarity along with a normalization step lead to reliable party similarity prediction, without the need for manual annotation.},
added-at = {2022-09-21T21:23:59.000+0200},
address = {Abu Dhabi, UAE},
author = {Ceron, Tanise and Blokker, Nico and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2273bfc444ede51e1993243509ab2d1ae/sp},
booktitle = {Proceedings of CoNLL},
interhash = {c75df9db5c18eef841a635dfc0cf6e7e},
intrahash = {273bfc444ede51e1993243509ab2d1ae},
keywords = {conference myown},
pages = {325--338},
timestamp = {2024-02-22T12:31:50.000+0100},
title = {Optimizing text representations to capture (dis)similarity between political parties},
url = {https://aclanthology.org/2022.conll-1.22},
year = 2022
}