Social groups are central to political discussions. However, detecting social groups in text often relies on pre-determined socio-demographic categories or supervised learning methods that require extensive hand-labeled datasets. In this paper, we propose a methodology designed to leverage the potential of Large Language Models (LLMs) for the identification and annotation of social groups in text. The experiments show that open LLMs like Llama-2-70B-Chat and Mixtral-8-7B can reliably be used to annotate social groups in a few-shot scenario without the need for supervised learning. The automatically obtained annotations largely match human annotations on random samples from the Reddit Politosphere, resulting in micro-F1 scores of 0.71 and 0.83, respectively.
%0 Conference Paper
%1 10.1145/3630744.3658412
%A Jalali Farahani, Farane
%A Hanke, Sara
%A Dima, Corina
%A Heiberger, Raphael Heiko
%A Staab, Steffen
%B Companion Publication of the 16th ACM Web Science Conference
%C New York, NY, USA
%D 2024
%I Association for Computing Machinery
%K ac-iris3d
%P 24–25
%R 10.1145/3630744.3658412
%T Who is targeted? Detecting social group mentions in online political discussions
%U https://doi.org/10.1145/3630744.3658412
%X Social groups are central to political discussions. However, detecting social groups in text often relies on pre-determined socio-demographic categories or supervised learning methods that require extensive hand-labeled datasets. In this paper, we propose a methodology designed to leverage the potential of Large Language Models (LLMs) for the identification and annotation of social groups in text. The experiments show that open LLMs like Llama-2-70B-Chat and Mixtral-8-7B can reliably be used to annotate social groups in a few-shot scenario without the need for supervised learning. The automatically obtained annotations largely match human annotations on random samples from the Reddit Politosphere, resulting in micro-F1 scores of 0.71 and 0.83, respectively.
%@ 9798400704536
@inproceedings{10.1145/3630744.3658412,
abstract = {Social groups are central to political discussions. However, detecting social groups in text often relies on pre-determined socio-demographic categories or supervised learning methods that require extensive hand-labeled datasets. In this paper, we propose a methodology designed to leverage the potential of Large Language Models (LLMs) for the identification and annotation of social groups in text. The experiments show that open LLMs like Llama-2-70B-Chat and Mixtral-8-7B can reliably be used to annotate social groups in a few-shot scenario without the need for supervised learning. The automatically obtained annotations largely match human annotations on random samples from the Reddit Politosphere, resulting in micro-F1 scores of 0.71 and 0.83, respectively.},
added-at = {2024-11-11T10:34:01.000+0100},
address = {New York, NY, USA},
author = {Jalali Farahani, Farane and Hanke, Sara and Dima, Corina and Heiberger, Raphael Heiko and Staab, Steffen},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2d22526f95d3868aecc284f5417a957c3/analyticcomp},
booktitle = {Companion Publication of the 16th ACM Web Science Conference},
doi = {10.1145/3630744.3658412},
interhash = {e8448ffa96e97650ed57e85e00b5e6bb},
intrahash = {d22526f95d3868aecc284f5417a957c3},
isbn = {9798400704536},
keywords = {ac-iris3d},
location = {Stuttgart, Germany},
numpages = {2},
pages = {24–25},
publisher = {Association for Computing Machinery},
series = {Websci Companion '24},
timestamp = {2024-11-11T10:34:01.000+0100},
title = {Who is targeted? Detecting social group mentions in online political discussions},
url = {https://doi.org/10.1145/3630744.3658412},
year = 2024
}