Word meaning is notoriously difficult to capture, both synchronically and diachronically. In this paper, we describe the creation of the largest resource of graded contextualized, diachronic word meaning annotation in four different languages, based on 100,000 human semantic proximity judgments. We describe in detail the multi-round incremental annotation process, the choice for a clustering algorithm to group usages into senses, and possible -- diachronic and synchronic -- uses for this dataset.
%0 Conference Paper
%1 schlechtweg-etal-2021-dwug
%A Schlechtweg, Dominik
%A Tahmasebi, Nina
%A Hengchen, Simon
%A Dubossarsky, Haim
%A McGillivray, Barbara
%B Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing
%C Online and Punta Cana, Dominican Republic
%D 2021
%I Association for Computational Linguistics
%K myown
%P 7079--7091
%R 10.18653/v1/2021.emnlp-main.567
%T DWUG: A large Resource of Diachronic Word Usage Graphs in Four Languages
%U https://aclanthology.org/2021.emnlp-main.567
%X Word meaning is notoriously difficult to capture, both synchronically and diachronically. In this paper, we describe the creation of the largest resource of graded contextualized, diachronic word meaning annotation in four different languages, based on 100,000 human semantic proximity judgments. We describe in detail the multi-round incremental annotation process, the choice for a clustering algorithm to group usages into senses, and possible -- diachronic and synchronic -- uses for this dataset.
@inproceedings{schlechtweg-etal-2021-dwug,
abstract = {Word meaning is notoriously difficult to capture, both synchronically and diachronically. In this paper, we describe the creation of the largest resource of graded contextualized, diachronic word meaning annotation in four different languages, based on 100,000 human semantic proximity judgments. We describe in detail the multi-round incremental annotation process, the choice for a clustering algorithm to group usages into senses, and possible {--} diachronic and synchronic {--} uses for this dataset.},
added-at = {2022-10-11T18:38:47.000+0200},
address = {Online and Punta Cana, Dominican Republic},
author = {Schlechtweg, Dominik and Tahmasebi, Nina and Hengchen, Simon and Dubossarsky, Haim and McGillivray, Barbara},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2f2c5f9fdf5843f78b95841cd2a676126/dschlechtweg},
booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
doi = {10.18653/v1/2021.emnlp-main.567},
interhash = {723b320c1f63192d65266de0bb1bf411},
intrahash = {f2c5f9fdf5843f78b95841cd2a676126},
keywords = {myown},
month = nov,
pages = {7079--7091},
publisher = {Association for Computational Linguistics},
timestamp = {2022-10-11T16:38:47.000+0200},
title = {{DWUG}: A large Resource of Diachronic Word Usage Graphs in Four Languages},
url = {https://aclanthology.org/2021.emnlp-main.567},
year = 2021
}