Medical narratives are fundamental to the correct identification of a patient’s health condition. This is not only because it describes the patient’s situation. It also contains relevant information about the patient’s context and health state evolution. Narratives are usually vague and cannot be categorized easily. On the other hand, once the patient’s situation is correctly identified based on a narrative, it is then possible to map the patient’s situation into precise classification schemas and ontologies that are machine-readable. To this end, language models can be trained to read and extract elements from these narratives. However, the main problem is the lack of data for model identification and model training in languages other than English. First, gold standard annotations are usually not available due to the high level of data protection for patient data. Second, gold standard annotations (if available) are difficult to access. Alternative available data, like MIMIC (Sci Data 3:1, 2016) is written in English and for specific patient conditions like intensive care. Thus, when model training is required for other types of patients, like oncology (and not intensive care), this could lead to bias. To facilitate clinical narrative model training, a method for creating high-quality synthetic narratives is needed.
%0 Journal Article
%1 diazochoa2024aluminum
%A Diaz Ochoa, Juan G.
%A Mustafa, Faizan E.
%A Weil, Felix
%A Wang, Yi
%A Kama, Kudret
%A Knott, Markus
%D 2024
%J BMC Medical Informatics and Decision Making
%K ki
%N 1
%P 409--
%R 10.1186/s12911-024-02825-4
%T The aluminum standard: using generative Artificial Intelligence tools to synthesize and annotate non-structured patient data
%U https://doi.org/10.1186/s12911-024-02825-4
%V 24
%X Medical narratives are fundamental to the correct identification of a patient’s health condition. This is not only because it describes the patient’s situation. It also contains relevant information about the patient’s context and health state evolution. Narratives are usually vague and cannot be categorized easily. On the other hand, once the patient’s situation is correctly identified based on a narrative, it is then possible to map the patient’s situation into precise classification schemas and ontologies that are machine-readable. To this end, language models can be trained to read and extract elements from these narratives. However, the main problem is the lack of data for model identification and model training in languages other than English. First, gold standard annotations are usually not available due to the high level of data protection for patient data. Second, gold standard annotations (if available) are difficult to access. Alternative available data, like MIMIC (Sci Data 3:1, 2016) is written in English and for specific patient conditions like intensive care. Thus, when model training is required for other types of patients, like oncology (and not intensive care), this could lead to bias. To facilitate clinical narrative model training, a method for creating high-quality synthetic narratives is needed.
@article{diazochoa2024aluminum,
abstract = {Medical narratives are fundamental to the correct identification of a patient’s health condition. This is not only because it describes the patient’s situation. It also contains relevant information about the patient’s context and health state evolution. Narratives are usually vague and cannot be categorized easily. On the other hand, once the patient’s situation is correctly identified based on a narrative, it is then possible to map the patient’s situation into precise classification schemas and ontologies that are machine-readable. To this end, language models can be trained to read and extract elements from these narratives. However, the main problem is the lack of data for model identification and model training in languages other than English. First, gold standard annotations are usually not available due to the high level of data protection for patient data. Second, gold standard annotations (if available) are difficult to access. Alternative available data, like MIMIC (Sci Data 3:1, 2016) is written in English and for specific patient conditions like intensive care. Thus, when model training is required for other types of patients, like oncology (and not intensive care), this could lead to bias. To facilitate clinical narrative model training, a method for creating high-quality synthetic narratives is needed.},
added-at = {2025-02-19T17:00:10.000+0100},
author = {Diaz Ochoa, Juan G. and Mustafa, Faizan E. and Weil, Felix and Wang, Yi and Kama, Kudret and Knott, Markus},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/27b4851821087f0aa95f4ba6297a9d4e6/joy},
doi = {10.1186/s12911-024-02825-4},
interhash = {327deee07a985244f4944d3f7d8491f5},
intrahash = {7b4851821087f0aa95f4ba6297a9d4e6},
issn = {14726947},
journal = {BMC Medical Informatics and Decision Making},
keywords = {ki},
number = 1,
pages = {409--},
refid = {Diaz Ochoa2024},
timestamp = {2025-02-19T17:00:10.000+0100},
title = {The aluminum standard: using generative Artificial Intelligence tools to synthesize and annotate non-structured patient data},
url = {https://doi.org/10.1186/s12911-024-02825-4},
volume = 24,
year = 2024
}