Quotation detection is the task of locating spans of quoted speech in text. The state of the art treats this problem as a sequence labeling task and employs linear-chain conditional random fields. We question the efficacy of this choice: The Markov assumption in the model prohibits it from making joint decisions about the begin, end, and internal context of a quotation. We perform an extensive analysis with two new model architectures. We find that (a), simple boundary classification combined with a greedy prediction strategy is competitive with the state of the art; (b), a semi-Markov model significantly outperforms all others, by relaxing the Markov assumption.
%0 Conference Paper
%1 scheible16:_model_archit_quotat_detec
%A Scheible, Christian
%A Klinger, Roman
%A Padó, Sebastian
%B Proceedings of ACL
%C Berlin, Germany
%D 2016
%K conference myown
%P 1736--1745
%T Model Architectures for Quotation Detection
%U http://www.aclweb.org/anthology/P/P16/P16-1164.pdf
%X Quotation detection is the task of locating spans of quoted speech in text. The state of the art treats this problem as a sequence labeling task and employs linear-chain conditional random fields. We question the efficacy of this choice: The Markov assumption in the model prohibits it from making joint decisions about the begin, end, and internal context of a quotation. We perform an extensive analysis with two new model architectures. We find that (a), simple boundary classification combined with a greedy prediction strategy is competitive with the state of the art; (b), a semi-Markov model significantly outperforms all others, by relaxing the Markov assumption.
@inproceedings{scheible16:_model_archit_quotat_detec,
abstract = {Quotation detection is the task of locating spans of quoted speech in text. The state of the art treats this problem as a sequence labeling task and employs linear-chain conditional random fields. We question the efficacy of this choice: The Markov assumption in the model prohibits it from making joint decisions about the begin, end, and internal context of a quotation. We perform an extensive analysis with two new model architectures. We find that (a), simple boundary classification combined with a greedy prediction strategy is competitive with the state of the art; (b), a semi-Markov model significantly outperforms all others, by relaxing the Markov assumption.},
added-at = {2017-04-03T19:28:28.000+0200},
address = {Berlin, Germany},
author = {Scheible, Christian and Klinger, Roman and Padó, Sebastian},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2fbeab4234e533692e6d7e938fccff533/sp},
booktitle = {Proceedings of ACL},
interhash = {c77dfb02001fe26838c9936221ace71a},
intrahash = {fbeab4234e533692e6d7e938fccff533},
keywords = {conference myown},
pages = {1736--1745},
timestamp = {2024-02-22T12:36:29.000+0100},
title = {Model Architectures for Quotation Detection},
url = {http://www.aclweb.org/anthology/P/P16/P16-1164.pdf},
year = 2016
}