Background: Research results in artificial intelligence (AI) are criticized for not being reproducible. Objective: To quantify the state of reproducibility of empirical AI research using six reproducibility metrics measuring three different degrees of reproducibility. Hypotheses: 1) AI research is not documented well enough to reproduce the reported results. 2) Documentation practices have improved over time. Method: The literature is reviewed and a set of variables that should be documented to enable reproducibility are grouped into three factors: Experiment, Data and Method. The metrics describe how well the factors have been documented for a paper. A total of 400 research papers from the conference series IJCAI and AAAI have been surveyed using the metrics. Findings: None of the papers document all of the variables. The metrics show that between 20\% and 30\% of the variables for each factor are documented. One of the metrics show statistically significant increase over time while the others show no change. Interpretation: The reproducibility scores decrease with in- creased documentation requirements. Improvement over time is found. Conclusion: Both hypotheses are supported.
%0 Conference Paper
%1 GundersenKjensmo2018State
%A Gundersen, Odd Erik
%A Kjensmo, Sigbjørn
%B Proceedings of the AAAI Conference on Artificial Intelligence
%D 2018
%K Blaupause,Diss,gelesen,Literature Field,Reproducibility,zitiert Research Review,Other
%R 10.1609/aaai.v32i1.11503
%T State of the Art: Reproducibility in Artificial Intelligence
%V 32
%X Background: Research results in artificial intelligence (AI) are criticized for not being reproducible. Objective: To quantify the state of reproducibility of empirical AI research using six reproducibility metrics measuring three different degrees of reproducibility. Hypotheses: 1) AI research is not documented well enough to reproduce the reported results. 2) Documentation practices have improved over time. Method: The literature is reviewed and a set of variables that should be documented to enable reproducibility are grouped into three factors: Experiment, Data and Method. The metrics describe how well the factors have been documented for a paper. A total of 400 research papers from the conference series IJCAI and AAAI have been surveyed using the metrics. Findings: None of the papers document all of the variables. The metrics show that between 20\% and 30\% of the variables for each factor are documented. One of the metrics show statistically significant increase over time while the others show no change. Interpretation: The reproducibility scores decrease with in- creased documentation requirements. Improvement over time is found. Conclusion: Both hypotheses are supported.
@inproceedings{GundersenKjensmo2018State,
abstract = {Background: Research results in artificial intelligence (AI) are criticized for not being reproducible. Objective: To quantify the state of reproducibility of empirical AI research using six reproducibility metrics measuring three different degrees of reproducibility. Hypotheses: 1) AI research is not documented well enough to reproduce the reported results. 2) Documentation practices have improved over time. Method: The literature is reviewed and a set of variables that should be documented to enable reproducibility are grouped into three factors: Experiment, Data and Method. The metrics describe how well the factors have been documented for a paper. A total of 400 research papers from the conference series IJCAI and AAAI have been surveyed using the metrics. Findings: None of the papers document all of the variables. The metrics show that between 20\% and 30\% of the variables for each factor are documented. One of the metrics show statistically significant increase over time while the others show no change. Interpretation: The reproducibility scores decrease with in- creased documentation requirements. Improvement over time is found. Conclusion: Both hypotheses are supported.},
added-at = {2024-01-29T00:52:52.000+0100},
author = {Gundersen, Odd Erik and Kjensmo, Sigbj{\o}rn},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2c170771e6567ed53dceb95618366cffc/hermann},
booktitle = {Proceedings of the {{AAAI Conference}} on {{Artificial Intelligence}}},
doi = {10.1609/aaai.v32i1.11503},
file = {/Users/sibyllehermann/Zotero/storage/IRXVPAKF/Gundersen und Kjensmo - 2018 - State of the Art Reproducibility in Artificial In.pdf},
interhash = {d751c2dec9822c6feb4557861102c792},
intrahash = {c170771e6567ed53dceb95618366cffc},
keywords = {Blaupause,Diss,gelesen,Literature Field,Reproducibility,zitiert Research Review,Other},
month = apr,
shorttitle = {State of the {{Art}}},
timestamp = {2024-01-29T00:52:52.000+0100},
title = {State of the {{Art}}: {{Reproducibility}} in {{Artificial Intelligence}}},
urldate = {2024-01-25},
volume = 32,
year = 2018
}