Generative modelling aims to accelerate the discovery of novel chemicals by directly proposing
structures with desirable properties. Recently, score-based, or diffusion, generative models have
significantly outperformed previous approaches. Key to their success is the close relationship between
the score and physical force, allowing the use of powerful equivariant neural networks. However, the
behaviour of the learnt score is not yet well understood. Here, we analyse the score by training an
energy-based diffusion model for molecular generation. We find that during the generation the score
resembles a restorative potential initially and a quantum-mechanical force at the end. In between
the two endpoints, it exhibits special properties that enable the building of large molecules. Using
insights from the trained model, we present Similarity-based Molecular Generation (SiMGen), a new
method for zero shot molecular generation. SiMGen combines a time-dependent similarity kernel
with descriptors from a pretrained machine learning force field to generate molecules without any
further training. Our approach allows full control over the molecular shape through point cloud
priors and supports conditional generation. We also release an interactive web tool that allows users
to generate structures with SiMGen online (https://zndraw.icp.uni-stuttgart.de).
Description
[2402.08708] Zero Shot Molecular Generation via Similarity Kernels
%0 Generic
%1 elijosius24a
%A Elijošius, Rokas
%A Zills, Fabian
%A Batatia, Ilyes
%A Norwood, Sam Walton
%A Kovács, Dávid Péter
%A Holm, Christian
%A Csányi, Gábor
%D 2024
%K icp preprint
%R 10.48550/arXiv.2402.08708
%T Zero Shot Molecular Generation via Similarity Kernels
%X Generative modelling aims to accelerate the discovery of novel chemicals by directly proposing
structures with desirable properties. Recently, score-based, or diffusion, generative models have
significantly outperformed previous approaches. Key to their success is the close relationship between
the score and physical force, allowing the use of powerful equivariant neural networks. However, the
behaviour of the learnt score is not yet well understood. Here, we analyse the score by training an
energy-based diffusion model for molecular generation. We find that during the generation the score
resembles a restorative potential initially and a quantum-mechanical force at the end. In between
the two endpoints, it exhibits special properties that enable the building of large molecules. Using
insights from the trained model, we present Similarity-based Molecular Generation (SiMGen), a new
method for zero shot molecular generation. SiMGen combines a time-dependent similarity kernel
with descriptors from a pretrained machine learning force field to generate molecules without any
further training. Our approach allows full control over the molecular shape through point cloud
priors and supports conditional generation. We also release an interactive web tool that allows users
to generate structures with SiMGen online (https://zndraw.icp.uni-stuttgart.de).
@misc{elijosius24a,
abstract = {Generative modelling aims to accelerate the discovery of novel chemicals by directly proposing
structures with desirable properties. Recently, score-based, or diffusion, generative models have
significantly outperformed previous approaches. Key to their success is the close relationship between
the score and physical force, allowing the use of powerful equivariant neural networks. However, the
behaviour of the learnt score is not yet well understood. Here, we analyse the score by training an
energy-based diffusion model for molecular generation. We find that during the generation the score
resembles a restorative potential initially and a quantum-mechanical force at the end. In between
the two endpoints, it exhibits special properties that enable the building of large molecules. Using
insights from the trained model, we present Similarity-based Molecular Generation (SiMGen), a new
method for zero shot molecular generation. SiMGen combines a time-dependent similarity kernel
with descriptors from a pretrained machine learning force field to generate molecules without any
further training. Our approach allows full control over the molecular shape through point cloud
priors and supports conditional generation. We also release an interactive web tool that allows users
to generate structures with SiMGen online (https://zndraw.icp.uni-stuttgart.de).},
added-at = {2024-03-27T09:43:01.000+0100},
archiveprefix = {arXiv},
author = {Elijošius, Rokas and Zills, Fabian and Batatia, Ilyes and Norwood, Sam Walton and Kovács, Dávid Péter and Holm, Christian and Csányi, Gábor},
biburl = {https://puma.ub.uni-stuttgart.de/bibtex/2a36ffb4d073542470967688db997304a/mzecevic},
description = {[2402.08708] Zero Shot Molecular Generation via Similarity Kernels},
doi = {10.48550/arXiv.2402.08708},
eprint = {2402.08708},
interhash = {9f917b4a269e4d100d0406f9905afb0b},
intrahash = {a36ffb4d073542470967688db997304a},
keywords = {icp preprint},
month = feb,
primaryclass = {physics.chem-ph},
timestamp = {2024-03-27T09:52:01.000+0100},
title = {Zero Shot Molecular Generation via Similarity Kernels},
year = 2024
}