Large-language models (LLMs) have the potential to support a wide range of applications like conversational agents, creative writing, text improvement, and general query answering. However, they are ill-suited for query answering in high-stake domains like medicine because they generate answers at random and their answers are typically not robust - even the same query can result in different answers when prompted multiple times. In order to improve the robustness of LLM queries, we propose using ranking queries repeatedly and to aggregate the queries using methods from social choice theory. We study ranking queries in diagnostic settings like medical and fault diagnosis and discuss how the Partial Borda Choice function from the literature can be applied to merge multiple query results. We discuss some additional interesting properties in our setting and evaluate the robustness of our approach empirically.
%0 Conference Paper
%1 potyka2024robust
%A Potyka, Nico
%A Zhu, Yuqicheng
%A He, Yunjie
%A Kharlamov, Evgeny
%A Staab, Steffen
%B In Proceedings of the 23rd International Conference on Autonomous Agents and Multi-Agent Systems
%D 2024
%K LLM Robustness myown
%T Robust Knowledge Extraction from Large Language Models using Social Choice Theory
%U https://arxiv.org/abs/2312.14877
%X Large-language models (LLMs) have the potential to support a wide range of applications like conversational agents, creative writing, text improvement, and general query answering. However, they are ill-suited for query answering in high-stake domains like medicine because they generate answers at random and their answers are typically not robust - even the same query can result in different answers when prompted multiple times. In order to improve the robustness of LLM queries, we propose using ranking queries repeatedly and to aggregate the queries using methods from social choice theory. We study ranking queries in diagnostic settings like medical and fault diagnosis and discuss how the Partial Borda Choice function from the literature can be applied to merge multiple query results. We discuss some additional interesting properties in our setting and evaluate the robustness of our approach empirically.
@inproceedings{potyka2024robust,
abstract = {Large-language models (LLMs) have the potential to support a wide range of applications like conversational agents, creative writing, text improvement, and general query answering. However, they are ill-suited for query answering in high-stake domains like medicine because they generate answers at random and their answers are typically not robust - even the same query can result in different answers when prompted multiple times. In order to improve the robustness of LLM queries, we propose using ranking queries repeatedly and to aggregate the queries using methods from social choice theory. We study ranking queries in diagnostic settings like medical and fault diagnosis and discuss how the Partial Borda Choice function from the literature can be applied to merge multiple query results. We discuss some additional interesting properties in our setting and evaluate the robustness of our approach empirically.},
added-at = {2023-12-28T14:51:44.000+0100},
author = {Potyka, Nico and Zhu, Yuqicheng and He, Yunjie and Kharlamov, Evgeny and Staab, Steffen},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2f228d993820c433e347c97bed74f346f/yuqichengzhu},
booktitle = {In Proceedings of the 23rd International Conference on Autonomous Agents and Multi-Agent Systems},
description = {accepted as a full paper by AAMAS'24},
interhash = {43343a3d35159a789cb64366457993b2},
intrahash = {f228d993820c433e347c97bed74f346f},
keywords = {LLM Robustness myown},
timestamp = {2024-03-08T17:00:53.000+0100},
title = {Robust Knowledge Extraction from Large Language Models using Social Choice Theory},
url = {https://arxiv.org/abs/2312.14877},
year = 2024
}