<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "JATS-journalpublishing1.dtd">
<article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">INFORMATICA</journal-id>
<journal-title-group><journal-title>Informatica</journal-title></journal-title-group>
<issn pub-type="epub">1822-8844</issn><issn pub-type="ppub">0868-4952</issn><issn-l>0868-4952</issn-l>
<publisher>
<publisher-name>Vilnius University</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">INFOR535</article-id>
<article-id pub-id-type="doi">10.15388/23-INFOR535</article-id>
<article-categories><subj-group subj-group-type="heading">
<subject>Research Article</subject></subj-group></article-categories>
<title-group>
<article-title>An Effective Solution for Drug Discovery Based on the Tangram Meta-Heuristic and Compound Filtering</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Cruz</surname><given-names>Nicolás C.</given-names></name><email xlink:href="ncalvocruz@ugr.es">ncalvocruz@ugr.es</email><xref ref-type="aff" rid="j_infor535_aff_001">1</xref><xref ref-type="corresp" rid="cor1">∗</xref><bio>
<p><bold>N.C. Cruz</bold> is a post-doctoral researcher at the Department of Computer Engineering, Automation, and Robotics of the University of Granada, Spain. After studying for a bachelor’s and master’s degree in computer engineering, he obtained his PhD in computer science at the University of Almería, Spain, in 2019. He is a member of the Supercomputing-Algorithms Research Group at that institution. His research focuses on numerical optimization through meta-heuristics and high-performance computing applied to different problems, such as design and control of solar power tower plants, neural model tuning, and optimization of mechanisms.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Puertas-Martín</surname><given-names>Savíns</given-names></name><email xlink:href="savinspm@ual.es">savinspm@ual.es</email><xref ref-type="aff" rid="j_infor535_aff_002">2</xref><xref ref-type="aff" rid="j_infor535_aff_003">3</xref><bio>
<p><bold>S. Puertas-Martín</bold> is a post-doctoral researcher at the Department of Informatics of the University of Almería, Spain. He is also doing a research stay at the Information School of the University of Sheffield in the United Kingdom. He obtained his PhD in computer science at the University of Almería in 2020. He is a member of the Supercomputing-Algorithms Research Group at that institution. His research interests are drug discovery, global optimization and high-performance computing.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Redondo</surname><given-names>Juana L.</given-names></name><email xlink:href="jlredondo@ual.es">jlredondo@ual.es</email><xref ref-type="aff" rid="j_infor535_aff_002">2</xref><bio>
<p><bold>J.L. Redondo</bold> is a full professor at the Department of Informatics of the University of Almería, Spain. She obtained her PhD in computer science from the University of Almería in 2008. She is a member of the Supercomputing-Algorithms Research Group at that institution. Her research interests include high-performance computing, global optimization and applications.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Ortigosa</surname><given-names>Pilar M.</given-names></name><email xlink:href="ortigosa@ual.es">ortigosa@ual.es</email><xref ref-type="aff" rid="j_infor535_aff_002">2</xref><bio>
<p><bold>P.M. Ortigosa</bold> is a full professor of architecture and computer technology at the University of Almeriá, Spain. She received MSc degrees in physics and electronic engineering from the University of Granada in 1994 and 1996, respectively, and a PhD in computer science from the University of Málaga in 1999. She is a member of the Supercomputing-Algorithms Research Group at the University of Almería. Her research focuses on high-performance computing, metaheuristic global optimization, computational intelligence, deep learning, and the application to several real problems. Recently she has been working on the Internet of Things.</p></bio>
</contrib>
<aff id="j_infor535_aff_001"><label>1</label>Department of Computer Engineering, Automation and Robotics, <institution>University of Granada</institution>, <country>Spain</country></aff>
<aff id="j_infor535_aff_002"><label>2</label>Department of Informatics, <institution>University of Almería, ceiA3 campus</institution>, <country>Spain</country></aff>
<aff id="j_infor535_aff_003"><label>3</label>Information School, <institution>University of Sheffield</institution>, <country>United Kingdom</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>∗</label>Corresponding author.</corresp>
</author-notes>
<pub-date pub-type="ppub"><year>2023</year></pub-date><pub-date pub-type="epub"><day>7</day><month>11</month><year>2023</year></pub-date><volume>34</volume><issue>4</issue><fpage>743</fpage><lpage>769</lpage><history><date date-type="received"><month>5</month><year>2023</year></date><date date-type="accepted"><month>10</month><year>2023</year></date></history>
<permissions><copyright-statement>© 2023 Vilnius University</copyright-statement><copyright-year>2023</copyright-year>
<license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>Open access article under the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">CC BY</ext-link> license.</license-p></license></permissions>
<abstract>
<p>Ligand-Based Virtual Screening accelerates and cheapens the design of new drugs. However, it needs efficient optimizers because of the size of compound databases. This work proposes a new method called Tangram CW. The proposal also encloses a knowledge-based filter of compounds. Tangram CW achieves comparable results to the state-of-the-art tools OptiPharm and 2L-GO-Pharm using about a tenth of their computational budget without filtering. Activating it discards more than two thirds of the database while keeping the desired compounds. Thus, it is possible to consider molecular flexibility despite increasing the options. The implemented software package is public.</p>
</abstract>
<kwd-group>
<label>Key words</label>
<kwd>virtual screening</kwd>
<kwd>shape similarity</kwd>
<kwd>meta-heuristic</kwd>
<kwd>knowledge-based filtering</kwd>
<kwd>parallel computing</kwd>
</kwd-group>
<funding-group><funding-statement>This work has been supported by Grant PID2021-123278OB-I00 funded by MCIN/AEI/ 10.13039/501100011033 and by “ERDF A way of making Europe”; and by projects PDC2022-133370-I00 and TED2021-132020B-I00 funded by MCIN/AEI/ 10.13039/5011 00011033 and by European Union Next GenerationEU/PRTR. N.C. Cruz is supported by the Ministry of Economic Transformation, Industry, Knowledge and Universities from the Andalusian government (PAIDI 2021: POSTDOC_21_00124). Savíns Puertas Martín is a fellow of the “Margarita Salas” grant (RR_A_2021_21), financed by the European Union (NextGenerationEU).</funding-statement></funding-group>
</article-meta>
</front>
<body>
<sec id="j_infor535_s_001">
<label>1</label>
<title>Introduction</title>
<sec id="j_infor535_s_002">
<label>1.1</label>
<title>Overview</title>
<p>The drug discovery process is a major challenge in the real-world scenario of today, where different factors play a role (Hughes <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_018">2011</xref>). This implies that developing new drugs costs, on average, more than 1 billion USD and can take between 12 and 15 years at all stages (Sumudu and Leelananda, <xref ref-type="bibr" rid="j_infor535_ref_041">2016</xref>; Ban <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_002">2017</xref>). To speed up this process and reduce costs, there is a continuous process of designing and implementing new techniques from traditional medicine (Fu <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_013">2017</xref>) to High Throughput Screening (HTS) infrastructures (Zeng <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_048">2020</xref>).</p>
<p>In this context, Virtual Screening (VS) is a relevant in silico technique in drug discovery that can help identify potential drug candidates with high efficacy and safety profiles (McInnes, <xref ref-type="bibr" rid="j_infor535_ref_027">2007</xref>). In fact, VS has helped bring to market compounds such as ritonavir, nelfinavir, saquinavir (Kanhed <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_021">2021</xref>) or plasmepsin inhibitors (Meissner <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_028">2019</xref>). There are two types of VS methods depending on the information obtained from compounds: Structure-Based VS (SBVS) and Ligand-Based VS (LBVS). SBVS methods (Maia <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_025">2020</xref>) require knowledge of the structure of the target protein, which to obtain involves a set of challenges (Parois <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_029">2015</xref>). Consequently, in most cases LBVS are the only methods that can be applied because they do not require knowledge of the 3D structure of the target molecule (Hamza <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_016">2012</xref>).</p>
<p>LBVS methods are used to identify molecules in a database similar to another reference compound. To do so, they compare the structural and physicochemical properties (descriptors) of the reference molecule with those of compounds in the database, which may contain millions of compounds. Considering the latter, the computational efficiency of mathematical models that describe molecular descriptors is crucial. Despite the relatively low cost per evaluation, evaluating descriptors for thousands for a molecule, and subsequently for millions of molecules, can quickly become unaffordable in terms of time. Therefore, there is a need to prioritize descriptors that are computationally efficient to enable efficient screening of large numbers of molecules. Shape similarity has been identified as a descriptor of choice due to its ability to detect potential drug candidates that may have different chemical structures but similar shapes, which may mean that they exhibit similar biological activities (Carracedo-Reboredo <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_004">2021</xref>; Kumar and Zhang, <xref ref-type="bibr" rid="j_infor535_ref_022">2018</xref>), as well as its low computational cost. Consequently, shape similarity will be the descriptor used in this work to compare the quality of the different algorithms.</p>
<p>Finally, the flexibility of the molecules has to be also taken into account in LBVS problems (Rapaport, <xref ref-type="bibr" rid="j_infor535_ref_034">2004</xref>). Although literature works have mainly considered molecules as rigid objects, the reality is that molecules vary their interatomic distances and angles between atoms, giving rise to conformations, i.e. the same molecule with different interatomic distances and they potentially have different behaviours with other compounds and proteins. Consequently, flexibility must be taken into account when applying LBVS as it allows solutions to be found that would otherwise not be possible. The simplest example would be to find two identical molecules with different conformations: if conformations are not explored, no matter how good the search algorithm is, it will never find such a compound, or at least not with the desired percentage of similarity. On this basis, everything looks good for flexibility. However, the reason why it is not considered is that it increases computational calculations enormously as hundreds of different conformations can be generated from each molecule. To deal with this, filters are often applied to discard compounds before generating the conformations in order to avoid a large number of comparisons that would not return a promising result (Ellingson <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_011">2014</xref>; Poongavanam <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_030">2021</xref>). However, this is influenced by the quality of the filter, as compounds that a priori do not seem to be good candidates can be discarded. In this work, we are going to use the software OMEGA (Hawkins <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_017">2010</xref>) for the generation of conformations because of its widespread use in the literature and to facilitate future comparisons. Regarding the filters to discard compounds, we have included our own system, to be used as desired, in order not to consume too many computational resources.</p>
</sec>
<sec id="j_infor535_s_003">
<label>1.2</label>
<title>Related Works</title>
<p>Identifying compounds with similar shapes is a computationally demanding problem due to two main reasons: First, there is a vast number of molecules to analyse, up to millions. Secondly, finding the position of maximum overlap between every pair of molecules for the comparison to be descriptive is hard. Consequently, an exhaustive search is not feasible, and local search methods are frequent (Wang <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_045">2020</xref>; Ahmed <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_001">2018</xref>). Similarly, heuristics and meta-heuristics are often employed to achieve satisfactory solutions with reasonable computational effort (Lindfield and Penny, <xref ref-type="bibr" rid="j_infor535_ref_024">2017</xref>; Salhi, <xref ref-type="bibr" rid="j_infor535_ref_036">2017</xref>).</p>
<p>One of the most recent proposals among population-based meta-heuristics is OptiPharm (Puertas-Martín <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>; Puertas-Martín <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_032">2022</xref>). It offered several advantages over the state-of-art 3D alignment optimization methods ROCS (Software <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_038">2008</xref>) and WEGA (Yan <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_047">2013</xref>). Specifically, it outperformed them in the quality of solutions and execution time while also being highly configurable. An even more recent population-based algorithm is 2L-Go-Pharm (Ferrández <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_012">2022</xref>). It improved the quality of the OptiPharm solutions and reduced the number of function evaluations required. These methods were designed to be able to explore the entire search space both broadly and deeply, thus avoiding being confined to a local minimum. This feature is particularly useful for complex molecules with numerous degrees of freedom, as it allows for a comprehensive exploration of the search space.</p>
<p>Both OptiPharm and 2L-Go-Pharm are population-based algorithms that apply different techniques to a population to explore the optimal solution. OptiPharm uses the concept of species associated with a radius that decreases as the iterations progress (Jelasity <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_019">2001</xref>). In contrast, 2L-Go-Pharm uses a 2-level design in which the first one tries to detect solutions that have the potential to be local or global optima, and in the second level, these solutions are guided to the peaks. As population-based methods, they have high exploration capabilities (Lindfield and Penny, <xref ref-type="bibr" rid="j_infor535_ref_024">2017</xref>; Salhi, <xref ref-type="bibr" rid="j_infor535_ref_036">2017</xref>) and are intrinsically compatible with parallel computing (Boussaïd <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_003">2013</xref>; Sudholt, <xref ref-type="bibr" rid="j_infor535_ref_040">2015</xref>; Storn and Price, <xref ref-type="bibr" rid="j_infor535_ref_039">1997</xref>). On the other hand, they generally have multiple parameters to tune that significantly affect the search performance (Jones and Martins, <xref ref-type="bibr" rid="j_infor535_ref_020">2021</xref>; Rao <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_033">2012</xref>). Besides, they generally need numerous objective function evaluations to ensure remarkable and stable results (Costa and Nannicini, <xref ref-type="bibr" rid="j_infor535_ref_007">2018</xref>; Cruz <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_008">2022a</xref>). Accordingly, OptiPharm expects four parameters, and its robust configurations start from computational budgets of 200 000 (2L-Go-Pharm, 150 000) objective function evaluations (Puertas-Martín <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>), yet it can benefit from parallel computing (García <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_014">2023</xref>) as an evolutionary method.</p>
</sec>
<sec id="j_infor535_s_004">
<label>1.3</label>
<title>Contributions</title>
<p>The main contribution of this work is presenting the optimization algorithm Tangram CW. It is especially suitable for addressing shape similarity-based LBVS problems with rigid and flexible molecules. Nevertheless, the method is decoupled from the objective function and does not compute derivatives. Hence, it can be studied for different objective functions (problems) and can be classified as a black-box derivative-free optimizer (Costa and Nannicini, <xref ref-type="bibr" rid="j_infor535_ref_007">2018</xref>). The algorithm is a new version of the proposal made by the authors in (Cruz <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_009">2022b</xref>) and that showed promising results with a reduced consumption of function evaluations. The changes, which make the algorithm very effective for the problem at hand, are related to the division of the search space and the definition of variables that wrap around their bounds. It only expects two parameters: the total number of function evaluations and those consumed by the local search component every time. They can be directly related to the exploration and exploitation facets of search methods (Jones and Martins, <xref ref-type="bibr" rid="j_infor535_ref_020">2021</xref>; Van Geit <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_044">2008</xref>), i.e. reaching new regions of the search space and obtaining the best point out of the known ones, respectively.</p>
<p>Another relevant contribution of this work is a knowledge-based filter of compounds. Other algorithms, such as OptiPharm, rely on pre-defined position vectors representing promising solutions. They mainly improve the quality of the solutions obtained but do not allow discarding any compound in advance. In other words, although every compound in the considered database will be compared to the query or reference one in these descriptive positions, most will differ significantly from the beginning. Accordingly, this work defines an optional component that ranks every compound at these positions and discards those exhibiting low values considering a user-given tolerance. This aspect can be critical when working with flexibility, as databases increase so much in size that explorations graze infeasibility despite parallel computing. This tool is separated from the proposed optimizer and can be used independently.</p>
<p>Finally, the problem-level parallelization, i.e. how compounds are accessed, is considered from the beginning of the design of the proposed solution. Again, it is independent of the optimizer and the compound filter. Focusing on this side simplifies the management of parallel hardware, ensures relevant workloads, and is independent of the parallelization capabilities of the chosen optimizer. The implemented software package is publicly available in Cruz <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_010">2023</xref>).</p>
<p>The rest of the paper is structured as follows: Section <xref rid="j_infor535_s_005">2</xref> explains the proposed methodology from the compound positioning model and the objective function to the parallel database exploration workflow, the compound filter, and the designed optimizer. Section <xref rid="j_infor535_s_017">3</xref> describes the experimentation carried out to assess the proposal. Finally, Section <xref rid="j_infor535_s_023">4</xref> draws conclusions and proposes future work.</p>
</sec>
</sec>
<sec id="j_infor535_s_005" sec-type="materials|methods">
<label>2</label>
<title>Materials and Methods</title>
<p>This section describes the application framework of LBVS using the shape similarity metric and the proposed solution. Firstly, we define the Gaussian-model used to evaluate the similarity between two molecules. After that, the section describes the parallel exploration of compound databases for rigid and flexible molecules, the optional knowledge-based filter, and the proposed optimizer.</p>
<sec id="j_infor535_s_006">
<label>2.1</label>
<title>Positioning Model</title>
<p>As introduced, comparing two molecules requires applying a rotation and translation to one of them. In this work, such modification is defined by 10 variables in total. The first 7 define the rotation and the last 3 the translation. The first group of parameters can be divided into three sub-groups, the first parameter defines the rotation that is applied on the axis generated by the two 3D points generated with the following six parameters. Finally, the translation uses 3 parameters to be able to move the molecule on any axis.</p>
<p>These parameters are constrained to speed up the process and to avoid generating positions where there is no overlap. The rotation parameter is contained in the range <inline-formula id="j_infor535_ineq_001"><alternatives><mml:math>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">π</mml:mi>
<mml:mo fence="true" stretchy="false">]</mml:mo></mml:math><tex-math><![CDATA[$[0,2\pi ]$]]></tex-math></alternatives></inline-formula>. The points defining the rotation axis are created inside the box containing the molecule to be rotated. And finally, the ranges for the translation parameters are calculated by taking the difference in size between the two molecules and keeping the larger value for each axis. For a more detailed description of the procedure, the reader is recommended to read the original paper (Puertas-Martín <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>).</p>
</sec>
<sec id="j_infor535_s_007">
<label>2.2</label>
<title>Shape Similarity Metric</title>
<p>The shape similarity between two compounds is calculated by obtaining the overlap between their atoms using the Gaussian-model. This model is widely used in the literature for its trade-off between solution quality and performance, and it takes the concept of the Gaussian function and assimilates it to the density distribution function of an atom. It is used by other popular software such as ROCS (Software <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_038">2008</xref>), WEGA (Yan <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_047">2013</xref>), OptiPharm (Puertas-Martín <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>) and 2L-GO-Pharm (Ferrández <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_012">2022</xref>) in different versions.</p>
<p>To obtain the shape similarity between an <italic>A</italic> and a <italic>B</italic> molecule, we use the model defined in Yan <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_047">2013</xref>) which incorporates a weight associated with each atom, thus improving the model. The similarity value is given by the following expression: 
<disp-formula id="j_infor535_eq_001">
<label>(1)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">A</mml:mi>
<mml:mi mathvariant="italic">B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo>
<mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:mi mathvariant="italic">A</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:mi mathvariant="italic">B</mml:mi>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {V_{AB}^{g}}=\sum \limits_{i\in A,j\in B}{w_{i}}{w_{j}}{v_{ij}^{g}},\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_infor535_ineq_002"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${w_{i}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_infor535_ineq_003"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${w_{j}}$]]></tex-math></alternatives></inline-formula> are weights corresponding to the atoms <italic>i</italic> and <italic>j</italic>, respectively. Those weights are computed using the following formula: 
<disp-formula id="j_infor535_eq_002">
<label>(2)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>+</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo stretchy="false">≠</mml:mo>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {w_{i}}=\frac{{v_{i}}}{{v_{i}}+k{\textstyle\sum _{j\ne i}}{v_{ij}^{g}}},\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_infor535_ineq_004"><alternatives><mml:math>
<mml:mi mathvariant="italic">k</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>0.8665</mml:mn></mml:math><tex-math><![CDATA[$k=0.8665$]]></tex-math></alternatives></inline-formula> is a universal constant, and <inline-formula id="j_infor535_ineq_005"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${v_{i}}$]]></tex-math></alternatives></inline-formula> is the volume of the atom <italic>i</italic>, which is calculated using the volume of the sphere as in Yan <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_047">2013</xref>), <inline-formula id="j_infor535_ineq_006"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="false">
<mml:mfrac>
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:mi mathvariant="italic">π</mml:mi>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">σ</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mstyle></mml:math><tex-math><![CDATA[${v_{i}}=\frac{4\pi {\sigma _{i}^{3}}}{3}$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_infor535_ineq_007"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">σ</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\sigma _{i}}$]]></tex-math></alternatives></inline-formula> being the radius of the atom. Finally, <inline-formula id="j_infor535_ineq_008"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${v_{ij}^{g}}$]]></tex-math></alternatives></inline-formula> is a product of Gaussian functions: 
<disp-formula id="j_infor535_eq_003">
<label>(3)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∫</mml:mo></mml:mstyle>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">r</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">r</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi><mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="italic">r</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">→</mml:mo></mml:mover>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∫</mml:mo></mml:mstyle>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" maxsize="1.19em" minsize="1.19em">(</mml:mo><mml:mstyle displaystyle="false">
<mml:mfrac>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">π</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">σ</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo mathvariant="normal" fence="true" maxsize="1.19em" minsize="1.19em">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">e</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" maxsize="1.19em" minsize="1.19em">(</mml:mo><mml:mstyle displaystyle="false">
<mml:mfrac>
<mml:mrow>
<mml:mn>3</mml:mn>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">π</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>4</mml:mn>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">σ</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo mathvariant="normal" fence="true" maxsize="1.19em" minsize="1.19em">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mn>3</mml:mn>
</mml:mrow>
</mml:msup>
<mml:msup>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="bold-italic">r</mml:mi>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="bold-italic">r</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="bold-italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msup>
<mml:mi mathvariant="italic">d</mml:mi><mml:mover accent="true">
<mml:mrow>
<mml:mi mathvariant="italic">r</mml:mi>
</mml:mrow>
<mml:mo stretchy="false">→</mml:mo></mml:mover>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {v_{ij}^{g}}=\int {g_{i}}(r){g_{j}}(r)d\vec{r}=\int p{e^{-{\big(\frac{3p{\pi ^{1/2}}}{4{\sigma _{i}^{3}}}\big)^{2/3}}{(\boldsymbol{r}-{\boldsymbol{r}_{\boldsymbol{i}}})^{2}}}}p{e^{-{\big(\frac{3p{\pi ^{1/2}}}{4{\sigma _{j}^{3}}}\big)^{2/3}}{(\boldsymbol{r}-{\boldsymbol{r}_{\boldsymbol{j}}})^{2}}}}d\vec{r},\]]]></tex-math></alternatives>
</disp-formula> 
where <italic>p</italic> is a parameter controlling the softness of the Gaussian spheres, i.e. the height of the original Gaussian function, and <italic>σ</italic> is the radius of the atom. The values associated with these parameters are empirical values obtained from the original work (Yan <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_047">2013</xref>).</p>
<p>Note that the maximum value of the function in (<xref rid="j_infor535_eq_001">1</xref>) depends on the number of atoms of the analysed molecules. Consequently, these values must be normalized to compare the results. For this, a standard in the literature is to use the Tanimoto similarity (Cereto-Massagué <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_005">2015</xref>; Rogers and Tanimoto, <xref ref-type="bibr" rid="j_infor535_ref_035">1960</xref>), which returns a value in the range <inline-formula id="j_infor535_ineq_009"><alternatives><mml:math>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">]</mml:mo></mml:math><tex-math><![CDATA[$[0,1]$]]></tex-math></alternatives></inline-formula>, where 0 means that there is no similarity between the two molecules, and 1 implies that the two molecules are identical. 
<disp-formula id="j_infor535_eq_004">
<label>(4)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:mi mathvariant="italic">T</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">A</mml:mi>
<mml:mi mathvariant="italic">B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">A</mml:mi>
<mml:mi mathvariant="italic">A</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>+</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">B</mml:mi>
<mml:mi mathvariant="italic">B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:mo>−</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">V</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">A</mml:mi>
<mml:mi mathvariant="italic">B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ T{c_{S}}=\frac{{V_{AB}^{g}}}{{V_{AA}^{g}}+{V_{BB}^{g}}-{V_{AB}^{g}}}.\]]]></tex-math></alternatives>
</disp-formula>
</p>
</sec>
<sec id="j_infor535_s_008">
<label>2.3</label>
<title>Database Exploration Procedure</title>
<sec id="j_infor535_s_009">
<label>2.3.1</label>
<title>Standard LBVS Search Process</title>
<p>In this context, one can define a search for similar compounds from the reference or query compound and the database to scan. Algorithm <xref rid="j_infor535_fig_001">1</xref> describes the main steps of a basic LBVS seek process. Its fundamental parameters are the information of the reference compound (<italic>query</italic>) and the database to explore (<italic>database</italic>). In practical terms, the database refers to a directory containing a file with the details of every compound. Their identification depends on their file name. For example, one of the files in the dataset later used at experimentation is ‘DB00014.mol2’. The third parameter defines the comparison criterion, i.e. the Tanimoto similarity (<inline-formula id="j_infor535_ineq_010"><alternatives><mml:math>
<mml:mi mathvariant="italic">T</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[$T{c_{S}}$]]></tex-math></alternatives></inline-formula>), which represents the function to maximize in optimization terms. Along with them, the process also expects the optimization method for putting every candidate compound in the most descriptive comparison position (<italic>optimizer</italic>), whose parameters are omitted for simplicity, and how many promising compounds to track (<italic>histoLength</italic>). The latter aspect is interesting because further considerations may promote some compounds over others, even with lower ranks. Hence, it is advisable to provide experts with multiple options.</p>
<p>The procedure starts with loading the information of the query compound at line 1, i.e. a matrix with the details of every atom (<italic>matQ</italic>). This is used to compute the weighting (<italic>wQ</italic>) and overlap (<italic>ovQ</italic>) factors at lines 2 and 3, respectively. As the query is fixed, there is no need to repeat the specific computations, which can be obtained once and stored. After that, at line 4, the query is sought and excluded from the database. Otherwise, any robust search will always return the query itself as its most similar compound. However, readers should note that omitting the self-exclusion allows testing the robustness of proposals, as they should find the same compound sought. The preliminary stage ends by initializing the ordered list that will contain the most similar compounds found.</p>
<fig id="j_infor535_fig_001">
<label>Algorithm 1</label>
<caption>
<p>Standard process for exploring a compound database:</p>
</caption>
<graphic xlink:href="infor535_g001.jpg"/>
</fig>
<p>The search, which is defined between the lines 6 and 14, repeats the same process for every compound in the database. Specifically, it loads the matrix with the information of the atoms defining the current candidate (<italic>matC</italic>), which lets us compute its specific weighting (<inline-formula id="j_infor535_ineq_011"><alternatives><mml:math>
<mml:mi mathvariant="italic">w</mml:mi>
<mml:mi mathvariant="italic">C</mml:mi></mml:math><tex-math><![CDATA[$wC$]]></tex-math></alternatives></inline-formula>) and overlap (<italic>ovC</italic>) factors, at lines 7, 8, and 9, respectively. Along with the equivalent information from the query, they define the evaluation context for computing the Tanimoto similarity that the optimizer will try to maximize for every candidate compound during the search. Their explicit aggregation is shown at line 10, where the variable <italic>context</italic> is defined. After that, the bounds for every positioning variable are computed based on the limits in the coordinates found in the specific information of the query and candidate compounds. This computation is shown at line 11.</p>
<p>The critical and most computationally demanding part of every iteration of the search is at line 12. It launches the chosen optimizer to find the best comparison position of the candidate compound. This procedure will always try to find the position (<italic>pos</italic>) that results in the highest value (<italic>val</italic>), i.e. the Tanimoto similarity, <inline-formula id="j_infor535_ineq_012"><alternatives><mml:math>
<mml:mi mathvariant="italic">T</mml:mi>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[$T{c_{S}}$]]></tex-math></alternatives></inline-formula>. Its goal is to ensure that the ultimate selection of compounds is descriptive. At line 13, the solution found for the candidate compound, i.e. positioning vector and associated value, is considered for inclusion in the resulting list (<italic>foundCompounds</italic>). Every compound has an optimal position and value, but the latter can be very low. Thus, the ‘Append’ keeps in the list only the <italic>histoLength</italic> best ranked, which can be implemented as an ordered insertion. That list is finally returned at line 15 for the expert to further study the selection done.</p>
</sec>
<sec id="j_infor535_s_010">
<label>2.3.2</label>
<title>Parallelization Strategy</title>
<p>The search process described in Algorithm <xref rid="j_infor535_fig_001">1</xref> is mainly embarrassingly parallel (Trobec <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_043">2018</xref>). More specifically, the initialization stage is common and fixed. The search ultimately becomes a loop that takes every compound in the database and places it as well as possible using the optimizer. Positioning a compound, which is the most computationally demanding part, does not depend on the others. Hence, a parallel implementation of this search only needs to split the iterations of the loop into concurrent execution units.</p>
<p>Achieving this kind of parallelization is straightforward using tools offering high level of abstraction, such as the ‘<italic>parallel for</italic>’ construction of the OpenMP API (Trobec <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_043">2018</xref>) and the ‘<italic>parfor</italic>’ loop of MATLAB (Cruz <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_008">2022a</xref>). The former also allows adjusting the scheduling to minimize load unbalancing and idle execution units, as the iterations involving candidate compounds with numerous atoms take longer. Regardless, this aspect could be neglected assuming a uniform distribution of compound sizes in the database.</p>
<p>However, there is a critical point to consider for a proper parallel implementation in a shared-memory environment: The <inline-formula id="j_infor535_ineq_013"><alternatives><mml:math>
<mml:mtext mathvariant="italic">foundCompounds</mml:mtext></mml:math><tex-math><![CDATA[$\textit{foundCompounds}$]]></tex-math></alternatives></inline-formula> list is a shared variable that cannot be updated concurrently, or the result is unpredictable (race condition). This problem can be solved by defining a critical section around line 13, i.e. by ensuring that only one of the execution units tries to update the shared variable at every time. This situation is frequent in parallel programming and well supported by tools. For example, OpenMP offers the ‘<italic>critical</italic>’ construction for this purpose.</p>
<p>Another option is to define a local version of <inline-formula id="j_infor535_ineq_014"><alternatives><mml:math>
<mml:mtext mathvariant="italic">foundCompounds</mml:mtext></mml:math><tex-math><![CDATA[$\textit{foundCompounds}$]]></tex-math></alternatives></inline-formula> for every execution unit. This approach requires combining these partial selections before returning the final one at line 15. The combination must be done sequentially, but its computational cost is negligible. It is also relevant to highlight that every local version of <inline-formula id="j_infor535_ineq_015"><alternatives><mml:math>
<mml:mtext mathvariant="italic">foundCompounds</mml:mtext></mml:math><tex-math><![CDATA[$\textit{foundCompounds}$]]></tex-math></alternatives></inline-formula> must have the same maximum size set to <inline-formula id="j_infor535_ineq_016"><alternatives><mml:math>
<mml:mtext mathvariant="italic">histoLength</mml:mtext></mml:math><tex-math><![CDATA[$\textit{histoLength}$]]></tex-math></alternatives></inline-formula>. Otherwise, if one carelessly divides the limit by the number of concurrent execution units, the final list is likely to differ from the sequential execution by omitting some promising intermediate results. The reason is that one of the execution units could find multiple sub-optimal compounds yet better ranked than the best ones seen by the others during their exploration. Thus, a shorter size limit could force them to remove these results from their partial lists.</p>
</sec>
<sec id="j_infor535_s_011">
<label>2.3.3</label>
<title>Modifications to Support Flexibility</title>
<p>As mentioned, Algorithm <xref rid="j_infor535_fig_001">1</xref> expects a database with a single file for every different compound. That situation occurs when working with rigid molecules, but it is incompatible with considering flexible molecules. Since they have bonds that can rotate, covering different positions involves generating multiple files per compound by rotating their flexible bonds by different angles. They are generated in advance, as a preliminary stage. Intuitively, it can be compared to storing multiple pictures of a person from different angles to find better coincidences with other people. This process, known as generating the conformations of molecules (Puertas-Martín <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_032">2022</xref>), is expected to improve the results of LBVS by avoiding overlooking some compounds in favour of others. However, it also results in multiple data files per compound. Fortunately, adapting the standard exploration procedure to support this situation is straightforward in practical terms.</p>
<p>Specifically, the previous example file ‘DB00014.mol2’ will now be translated into multiple files with the following naming structure: ‘DB00014_conf1. mol2’, ‘DB00014_conf2.mol2’, and so on, depending on the number of conformations. Rigid compounds will still have a single representing file, but flexible ones might result in a few tens or even hundreds. In this context, the modifications of Algorithm <xref rid="j_infor535_fig_001">1</xref> start with postponing lines 1 to 3, as there might not be a single reference to fix. It is also necessary to modify (generalize) the self-exclusion process at line 4. Instead, it should now scan the files defining the database and produce two lists: one with the compounds to compare the query with (<inline-formula id="j_infor535_ineq_017"><alternatives><mml:math>
<mml:mtext mathvariant="italic">filtered_database</mml:mtext></mml:math><tex-math><![CDATA[$\textit{filtered\_database}$]]></tex-math></alternatives></inline-formula>), which includes multiple conformations or ‘versions’ of some of them, and the other with the different conformations of the query (<inline-formula id="j_infor535_ineq_018"><alternatives><mml:math>
<mml:mtext mathvariant="italic">conformations</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">list</mml:mtext></mml:math><tex-math><![CDATA[$\textit{conformations}\text{\_}\textit{list}$]]></tex-math></alternatives></inline-formula>).</p>
<p>Regarding the <inline-formula id="j_infor535_ineq_019"><alternatives><mml:math>
<mml:mtext mathvariant="italic">foundCompound</mml:mtext></mml:math><tex-math><![CDATA[$\textit{foundCompound}$]]></tex-math></alternatives></inline-formula> variable at line 5 in the original algorithm, it can be technically maintained as a plain list. However, it is advisable to redefine it as a structure in which there will be a field for every possible (‘candidate’) query conformation in the <inline-formula id="j_infor535_ineq_020"><alternatives><mml:math>
<mml:mtext mathvariant="italic">conformations</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">list</mml:mtext></mml:math><tex-math><![CDATA[$\textit{conformations}\text{\_}\textit{list}$]]></tex-math></alternatives></inline-formula> previously defined. In that context, each field will contain the results of the compound search in the same format as the original method, but separating the query conformation used for each case. By proceeding this way, the results will provide the expert with more information. Regardless, their standard interpretation will simply suggest as the result the compound (referring to its particular conformation) with a better value for the objective function (also mentioning the conformation considered out of the <inline-formula id="j_infor535_ineq_021"><alternatives><mml:math>
<mml:mtext mathvariant="italic">conformations</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">list</mml:mtext></mml:math><tex-math><![CDATA[$\textit{conformations}\text{\_}\textit{list}$]]></tex-math></alternatives></inline-formula>).</p>
<p>After the previous modifications, the search part is modified to start with an external loop that simply changes the query conformation, i.e. <bold>for</bold> <inline-formula id="j_infor535_ineq_022"><alternatives><mml:math>
<mml:mtext mathvariant="italic">query</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">conformation</mml:mtext>
<mml:mo stretchy="false">∈</mml:mo>
<mml:mtext mathvariant="italic">conformations</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">list</mml:mtext></mml:math><tex-math><![CDATA[$\textit{query}\text{\_}\textit{conformation}\in \textit{conformations}\text{\_}\textit{list}$]]></tex-math></alternatives></inline-formula> <bold>do</bold>. Its body starts with the former lines 1 to 3, i.e. by fixing the current query. After that, the original search loop must be included (nested), and iterate through the compounds in <inline-formula id="j_infor535_ineq_023"><alternatives><mml:math>
<mml:mtext mathvariant="italic">filtered</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">database</mml:mtext></mml:math><tex-math><![CDATA[$\textit{filtered}\text{\_}\textit{database}$]]></tex-math></alternatives></inline-formula>. Its result now becomes one of the fields of the modified <inline-formula id="j_infor535_ineq_024"><alternatives><mml:math>
<mml:mtext mathvariant="italic">foundCompound</mml:mtext></mml:math><tex-math><![CDATA[$\textit{foundCompound}$]]></tex-math></alternatives></inline-formula> variable. Thus, the original process is mainly repeated but i) after being included in an outer-level iterative procedure that changes the (conformation) query, and ii) iterating through the <inline-formula id="j_infor535_ineq_025"><alternatives><mml:math>
<mml:mtext mathvariant="italic">filtered</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">database</mml:mtext></mml:math><tex-math><![CDATA[$\textit{filtered}\text{\_}\textit{database}$]]></tex-math></alternatives></inline-formula>, which does not contain any file for the query compound but might have multiple ones for each original compound.</p>
<p>Notice that the parallelization strategy can be directly imported to the inner loop, i.e. in the per-query search, as in the standard approach. It ensures a significant amount of work for every execution unit. Moreover, although some compounds may have hundreds of conformations, others may have tens or even just one. Hence, dividing the space of potential compounds seems a more sensible and scalable option.</p>
</sec>
</sec>
<sec id="j_infor535_s_012">
<label>2.4</label>
<title>Compound Filter</title>
<p>Either the standard search process or the one dealing with conformations, comparing the query compound to the rest of the database is computationally demanding. The reason is the effort made to find the most descriptive relative position between the query and every candidate, i.e. solving multiple optimization problems. However, although every optimizer will try to find the best position in every case, most will be useless in the end. For example, let us consider a database with 2 001 rigid compounds and a computational budget of 200 000 objective function evaluations per comparison (positioning). Executing Algorithm <xref rid="j_infor535_fig_001">1</xref> for a particular query will take <inline-formula id="j_infor535_ineq_026"><alternatives><mml:math>
<mml:mn>2</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>200</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo>×</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>8</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[$2\hspace{0.1667em}000\times 200\hspace{0.1667em}000=4\times {10^{8}}$]]></tex-math></alternatives></inline-formula> function evaluations, but only <inline-formula id="j_infor535_ineq_027"><alternatives><mml:math>
<mml:mtext mathvariant="italic">histoLength</mml:mtext></mml:math><tex-math><![CDATA[$\textit{histoLength}$]]></tex-math></alternatives></inline-formula> results will be taken. Moreover, if one registered every partial result, the final rank achieved by the optimizer in numerous candidate compounds would be very low and far from the best ones.</p>
<p>In this context, it would be useful to discard (ignore from the database) those compounds having very low probabilities of matching the query. In terms of Chemistry, it could be possible to define a preliminary filter considering, for instance, the number of atoms defining the compound. However, generic criteria may significantly diverge from the particular magnitude of interest (e.g., Tanimoto’s shape similarity). It also implies studying other aspects. For this reason, this work proposes to use the same objective function to identify those compounds whose preliminary assessments are so different from the best ones that it seems logical to ignore them.</p>
<p>Unfortunately, computing the objective function involves defining a relative position between the query and candidate compound, i.e. relying on an initial solution for the corresponding optimization problem. However, it would not make sense to solve the positioning problem in order to avoid doing so. Besides, some of the global optimization methods used for the target problem, such as OptiPharm and the proposed Tangram algorithm, are not deterministic. Hence, using them to discard options doubles the uncertainty, and their choice would be virtually random without investing a significant amount of objective function evaluations. Aside from these inconveniences, stacking complete optimizers make tuning the search harder. Therefore, the proposal of this work is to preliminary rank compounds after considering a very reduced set of descriptive pre-defined positions.</p>
<p>Specifically, the proposed filter maintains the scheme of Algorithm <xref rid="j_infor535_fig_001">1</xref> with two main modifications. The first is replacing the call to an external optimizer by directly studying four pre-defined solutions, i.e. positioning vectors, for the query and every candidate (potentially filtered) compound. At this point, the value of every compound is kept, so the previous ‘Append’ function limiting the records to <inline-formula id="j_infor535_ineq_028"><alternatives><mml:math>
<mml:mtext mathvariant="italic">histoLength</mml:mtext></mml:math><tex-math><![CDATA[$\textit{histoLength}$]]></tex-math></alternatives></inline-formula> cases is not needed. The value assigned to every candidate compound is the maximum seen considering the referred four positions. Some readers might wonder why not to use the average, but preliminary experimentation demonstrated that it was more effective to register the best. In the end, it is a best-effort approach, and if some of the four positions are particularly bad in spite of being a promising candidate compound, its rank is inappropriately degraded. Regarding the four positions, as introduced, they are the ones used by OptiPharm to initialize its population: no movement, and an exclusive rotation of 180<sup>∘</sup> in the X, Y, and Z dimension, respectively. As detailed in Puertas-Martín <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>), these four positions refer to the most descriptive parts of the search space.</p>
<p>The second and last change represents the real filtering procedure. More specifically, after having preliminary explored the database and recorded the maximum value for every compound at one of the four initial positions, it is necessary to select a subset of them. The proposed filter offers two options for this purpose depending on a single parameter, <inline-formula id="j_infor535_ineq_029"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula> (from quantity). If <inline-formula id="j_infor535_ineq_030"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula> is an integer greater than 1 (logically, without exceeding the number of available compounds), the filter sorts the preliminary values and selects the best <inline-formula id="j_infor535_ineq_031"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula>. Conversely, if <inline-formula id="j_infor535_ineq_032"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula> is a value in the range <inline-formula id="j_infor535_ineq_033"><alternatives><mml:math>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo stretchy="false">∈</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi></mml:math><tex-math><![CDATA[$[0,1)\in \mathbb{R}$]]></tex-math></alternatives></inline-formula>, the filter sets the best-ranked compound as the reference, <inline-formula id="j_infor535_ineq_034"><alternatives><mml:math>
<mml:mtext mathvariant="italic">best</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">prel</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">val</mml:mtext></mml:math><tex-math><![CDATA[$\textit{best}\text{\_}\textit{prel}\text{\_}\textit{val}$]]></tex-math></alternatives></inline-formula>. Then, it picks those whose preliminary value is worse than the best up to a degradation percentage <inline-formula id="j_infor535_ineq_035"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula>, i.e. the compounds valued equal or greater than <inline-formula id="j_infor535_ineq_036"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>−</mml:mo>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>∗</mml:mo>
<mml:mtext mathvariant="italic">best</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">pre</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">val</mml:mtext></mml:math><tex-math><![CDATA[$(1-\textit{qnt})\ast \textit{best}\text{\_}\textit{pre}\text{\_}\textit{val}$]]></tex-math></alternatives></inline-formula>.</p>
<p>The described procedure should be launched to explore and reduce the size of the input database after removing the query or reference compound and before starting the complete (optimization-based) search, e.g. between lines 4 and 5 of Algorithm <xref rid="j_infor535_fig_001">1</xref>. This filter will only execute four objective function evaluations per compound, and can be executed in parallel, too. This process is also compatible with parallel computing. Similar to Algorithm <xref rid="j_infor535_fig_001">1</xref>, the most direct approach is to parallelize the loop focused on assessing every compound, i.e. computing independent the preliminary values and storing them at the corresponding indices.</p>
</sec>
<sec id="j_infor535_s_013">
<label>2.5</label>
<title>Tangram CW</title>
<sec id="j_infor535_s_014">
<label>2.5.1</label>
<title>Background</title>
<p>The Tangram algorithm presented in Cruz <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_009">2022b</xref>) is a black-box minimization meta-heuristic defining a reduced set of exploration rules, using (but not linked to) the SASS stochastic hill-climber for local optimization. Tangram, which expects two parameters at most, requires a normalized search space in which every search or decision variable is in the range <inline-formula id="j_infor535_ineq_037"><alternatives><mml:math>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">]</mml:mo></mml:math><tex-math><![CDATA[$[0,1]$]]></tex-math></alternatives></inline-formula>. Normalization simplifies implementation and avoids issues with variables of different scales (Snyman and Wilke, <xref ref-type="bibr" rid="j_infor535_ref_037">2005</xref>). Thus, it sees the following target problem: 
<disp-formula id="j_infor535_eq_005">
<label>(5)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mtable displaystyle="true" columnspacing="0pt 10pt 0pt" columnalign="right left right left">
<mml:mtr>
<mml:mtd/>
<mml:mtd>
<mml:munder>
<mml:mrow>
<mml:mtext>minimize</mml:mtext>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
</mml:munder>
</mml:mtd>
<mml:mtd/>
<mml:mtd>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd/>
<mml:mtd>
<mml:mtext>subject to</mml:mtext>
</mml:mtd>
<mml:mtd/>
<mml:mtd>
<mml:mn>0</mml:mn>
<mml:mo>⩽</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩽</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ \begin{array}{r@{\hskip0pt}l@{\hskip10pt}r@{\hskip0pt}l}& \displaystyle \underset{x}{\text{minimize}}& & \displaystyle f(x)\\ {} & \displaystyle \text{subject to}& & \displaystyle 0\leqslant {x_{i}}\leqslant 1,\hspace{2.5pt}i=1,\dots ,N.\end{array}\]]]></tex-math></alternatives>
</disp-formula> 
where <italic>f</italic> is a <italic>N</italic>-dimensional objective function, i.e. <inline-formula id="j_infor535_ineq_038"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo>:</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">→</mml:mo>
<mml:mi mathvariant="double-struck">R</mml:mi></mml:math><tex-math><![CDATA[$f:{[0,1]^{N}}\to \mathbb{R}$]]></tex-math></alternatives></inline-formula>. The term <italic>x</italic> refers to any input in <inline-formula id="j_infor535_ineq_039"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${[0-1]^{N}}$]]></tex-math></alternatives></inline-formula> belonging to the <italic>N</italic>-dimensional unit hypercube <inline-formula id="j_infor535_ineq_040"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">]</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${[0,1]^{N}}$]]></tex-math></alternatives></inline-formula>, which defines the search space. As the problem definition only consists of variable bounds and an objective function with unknown mathematical formulation and properties, it can be classified as a black-box optimization with box constraints (Costa and Nannicini, <xref ref-type="bibr" rid="j_infor535_ref_007">2018</xref>; Jones and Martins, <xref ref-type="bibr" rid="j_infor535_ref_020">2021</xref>).</p>
<p>In this context, Tangram starts by evaluating the centre of the hypercube, which becomes the current result. The method also decides to launch its standard mode or its incisive one. The former consists of three consecutive stages, global, division, and local, in a loop that ends after consuming all the evaluations. The global stage launches the local search from the current result and makes the maximum step size to cover the whole search space, which changes the current solution to a new one every time a better candidate solution is found. The division stage computes and evaluates the midpoint between the current solution and each corner of the search space. After that, the local stage launches the local search from these midpoints, starting with the best ranked, just in case the evaluation budget runs out before the stage ends. The main loop body ends by replacing the current solution with the best point reached during the local phase if any of them outperforms it. The incisive mode is mainly the same but merges the division and local stages. More specifically, it launches the local search from every midpoint immediately after having computed it, which makes it impossible to prioritize them but ensures local-sharpened results when the total evaluation budget is relatively low. For this reason, the incisive mode is only activated when the total number of function evaluations is lower than the number of corners, i.e. <inline-formula id="j_infor535_ineq_041"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${2^{N}}$]]></tex-math></alternatives></inline-formula>, plus the evaluations consumed with the centre and the first global stage.</p>
<p>The results achieved by Tangram with the benchmarks proposed in Costa and Nannicini (<xref ref-type="bibr" rid="j_infor535_ref_007">2018</xref>) for very low computational budgets were competitive. Considering them, along with the common roots and local search component, Tangram was also used to replicate the results of OptiPharm in Puertas-Martín <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>) at shape similarity LBVS. However, these preliminary results were not competitive, and there was room for improvement.</p>
<p>As the objective function is fast to compute, the number of allowed evaluations can be significantly greater than expected when designing the original method. Despite this possible increase in the computational budget, the problem dimensionality results in <inline-formula id="j_infor535_ineq_042"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mn>10</mml:mn>
</mml:mrow>
</mml:msup>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>024</mml:mn></mml:math><tex-math><![CDATA[${2^{10}}=1\hspace{0.1667em}024$]]></tex-math></alternatives></inline-formula> corners, which still makes it hard for the standard method to complete an iteration, even with the standard budget of 32 evaluations per local search proposed in Cruz <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_009">2022b</xref>). Related to this, the search space of this real-world problem seems to require more effort from the local search component than plain benchmarks, but increasing it further complicates completing stages of the standard Tangram.</p>
<p>Hence, the proposed version of Tangram will launch the local search from the centroid of every region defined by the current best solution and its nearest corners. There will be <inline-formula id="j_infor535_ineq_043"><alternatives><mml:math>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">N</mml:mi></mml:math><tex-math><![CDATA[$2N$]]></tex-math></alternatives></inline-formula> centroids, i.e. 20 in the considered context, which dramatically reduces the effort of the division and local stages and increases the scalability of the solver. This modification, which gives the modified optimizer the C (from centroid) in its name, will also allow providing the local search component with more evaluations. Finally, local searches might reach the bound of variables, and the standard approach is to saturate exceeding variables. For instance, if SASS shifts a certain variable from 4.9 to 5.2 and its bound is 5.0, its value will be fixed to 5.0. For regular variables, such as Euclidean distances, it seems reasonable to just stick to the problem-specific bounds. However, for those representing angles, as it occurs with some in the shape similarity LBVS positioning model, it also seems sound to wrap around the limits as angles do. For example, 355<sup>∘</sup> is near 5<sup>∘</sup> in the angular space, and if the local search component considers it beneficial to move in that direction, allowing that movement looks interesting. This enhanced flexibility, which applies to any problem with angular variables and was introduced by Ferrández <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_012">2022</xref>) in this context, gives the modified optimizer the W (from wrap around) in its name.</p>
</sec>
<sec id="j_infor535_s_015">
<label>2.5.2</label>
<title>Workflow of Tangram CW</title>
<p>Tangram CW, which inherits the background from its ancestor, follows Algorithm <xref rid="j_infor535_fig_002">2</xref>. The underlying process remains the same as outlined in the previous section for the original Tangram. Thus, the algorithm takes the centre of the search space as the initial solution to alternating a global stage with either i) a division and a local search phase (standard mode) or ii) a combination of both (incisive mode). For conciseness, let us focus on the differences with the original algorithm.</p>
<fig id="j_infor535_fig_002">
<label>Algorithm 2</label>
<caption>
<p>Tangram CW</p>
</caption>
<graphic xlink:href="infor535_g002.jpg"/>
</fig>
<fig id="j_infor535_fig_003">
<label>Algorithm 3</label>
<caption>
<p>get_Facets_Of_Hypercube</p>
</caption>
<graphic xlink:href="infor535_g003.jpg"/>
</fig>
<p>Firstly, the number of function evaluations that every local search takes is explicitly considered a parameter to tune, i.e. <inline-formula id="j_infor535_ineq_044"><alternatives><mml:math>
<mml:mtext mathvariant="italic">localEvals</mml:mtext></mml:math><tex-math><![CDATA[$\textit{localEvals}$]]></tex-math></alternatives></inline-formula>. Related to this, <inline-formula id="j_infor535_ineq_045"><alternatives><mml:math>
<mml:mtext mathvariant="italic">wraps</mml:mtext></mml:math><tex-math><![CDATA[$\textit{wraps}$]]></tex-math></alternatives></inline-formula> is also a novelty. It is a vector with the indices of the variables linked to angles, i.e. those to wrap around if needed. However, <inline-formula id="j_infor535_ineq_046"><alternatives><mml:math>
<mml:mtext mathvariant="italic">wrap</mml:mtext></mml:math><tex-math><![CDATA[$\textit{wrap}$]]></tex-math></alternatives></inline-formula> should not be considered a parameter to tune but context information like the objective function <italic>f</italic>. Considering the described positioning model, <inline-formula id="j_infor535_ineq_047"><alternatives><mml:math>
<mml:mtext mathvariant="italic">wraps</mml:mtext></mml:math><tex-math><![CDATA[$\textit{wraps}$]]></tex-math></alternatives></inline-formula> will be <inline-formula id="j_infor535_ineq_048"><alternatives><mml:math>
<mml:mo fence="true" stretchy="false">[</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">]</mml:mo></mml:math><tex-math><![CDATA[$[1]$]]></tex-math></alternatives></inline-formula> at experimentation, as the only angle is the first variable.</p>
<p>Secondly, at line 1, the statement <inline-formula id="j_infor535_ineq_049"><alternatives><mml:math>
<mml:mtext mathvariant="italic">get</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">Facets</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">Of</mml:mtext>
<mml:mtext>_</mml:mtext>
<mml:mtext mathvariant="italic">Hypercube</mml:mtext></mml:math><tex-math><![CDATA[$\textit{get}\text{\_}\textit{Facets}\text{\_}\textit{Of}\text{\_}\textit{Hypercube}$]]></tex-math></alternatives></inline-formula> differs from the original computation of the corners of the search space, i.e. <inline-formula id="j_infor535_ineq_050"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${2^{N}}$]]></tex-math></alternatives></inline-formula> points. The new one computes the sets of points that will be used to calculate the centroid of every search region, along with the current result, at the division stage. Algorithm <xref rid="j_infor535_fig_003">3</xref> computationally describes how to obtain each point set for a normalized search space of <italic>N</italic> dimensions. As noted, there will be <inline-formula id="j_infor535_ineq_051"><alternatives><mml:math>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">N</mml:mi></mml:math><tex-math><![CDATA[$2N$]]></tex-math></alternatives></inline-formula> sets of points, and each will internally consist of <inline-formula id="j_infor535_ineq_052"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${2^{N-1}}$]]></tex-math></alternatives></inline-formula> elements that are always different corners of the unit hypercube. Every corner in the same set shares a common dimension at least, and they form the facets. Figure <xref rid="j_infor535_fig_004">1</xref> shows the sets of points for <inline-formula id="j_infor535_ineq_053"><alternatives><mml:math>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>3</mml:mn></mml:math><tex-math><![CDATA[$N=1,2,3$]]></tex-math></alternatives></inline-formula>. Figure <xref rid="j_infor535_fig_005">2</xref> depicts how the groups of selected corners define the facets to compute the centroids of interest in a 2D search space. It implicitly represents the division stage of the method. Algorithm <xref rid="j_infor535_fig_002">2</xref> uses the sets of facets at lines 12, 16, and 20 with the current result to compute the centroid of the corresponding region.</p>
<fig id="j_infor535_fig_004">
<label>Fig. 1</label>
<caption>
<p>Set of facets for computing the division centroids when <inline-formula id="j_infor535_ineq_054"><alternatives><mml:math>
<mml:mi mathvariant="italic">N</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>3</mml:mn></mml:math><tex-math><![CDATA[$N=1,2,3$]]></tex-math></alternatives></inline-formula>.</p>
</caption>
<graphic xlink:href="infor535_g004.jpg"/>
</fig>
<fig id="j_infor535_fig_005">
<label>Fig. 2</label>
<caption>
<p>Corners used for computing each centroid in a 2D search space (Division stage).</p>
</caption>
<graphic xlink:href="infor535_g005.jpg"/>
</fig>
<p>Thirdly, the mode selection from lines 3 to 6 in Algorithm <xref rid="j_infor535_fig_002">2</xref> is conceptually equivalent to that in the original method. However, it takes into account that the new algorithm takes <inline-formula id="j_infor535_ineq_055"><alternatives><mml:math>
<mml:mtext mathvariant="italic">localEvals</mml:mtext></mml:math><tex-math><![CDATA[$\textit{localEvals}$]]></tex-math></alternatives></inline-formula> per execution of the local search component, and there will be <inline-formula id="j_infor535_ineq_056"><alternatives><mml:math>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">N</mml:mi></mml:math><tex-math><![CDATA[$2N$]]></tex-math></alternatives></inline-formula> division points, i.e. centroids. Considering these aspects, the underlying idea remains the same: launch the incisive mode only if the budget of function evaluations would be fully consumed after assessing the centre of the search space, the first global stage, and the different division points. In other words, when no local search will be executed for the input computational budget.</p>
<p>Fourthly and lastly, the global stage at line 8 keeps the maximum step (radius) of the local search component to the diameter of the search space, as in the original Tangram algorithm. However, this radius is set to the distance between the centroid of every region and the current solution at local phases (lines 16 and 21). Conversely, the original Tangram method would have set this value to the distance from the current solution and the midpoint between it and the corner involved. Independent of the alias ‘global’ or ‘local’, the calls at lines 8, 16, and 21 in Algorithm <xref rid="j_infor535_fig_002">2</xref> refer to the local search component, SASS. This method is not described due to space limitations, but the interested reader can find detailed explanation in Lančinskas <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_023">2013</xref>), Cruz <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_009">2022b</xref>). That said, bound checking must be modified to make the variables indexed in <inline-formula id="j_infor535_ineq_057"><alternatives><mml:math>
<mml:mi mathvariant="italic">w</mml:mi>
<mml:mi mathvariant="italic">r</mml:mi>
<mml:mi mathvariant="italic">a</mml:mi>
<mml:mi mathvariant="italic">p</mml:mi></mml:math><tex-math><![CDATA[$wrap$]]></tex-math></alternatives></inline-formula> to wrap around their opposite limits, which is trivial.</p>
</sec>
<sec id="j_infor535_s_016">
<label>2.5.3</label>
<title>Final Remarks</title>
<p>The previous explanation of Tangram CW is mainly problem-independent. Adapting the problem-specific objective function defined in (<xref rid="j_infor535_eq_004">4</xref>) is straightforward. It is only necessary to normalize the ten input variables so that the function domain becomes the 10-dimensional unit hypercube. However, as that function returns the overlap degree between the query and the studied compound, the optimizer should try to maximize it. Tangram CW, like the original method (and most optimization algorithms) is described in terms of minimization. Fortunately, converting a maximization problem into a minimization one is trivial. It is only necessary to multiply the objective function by −1, i.e. maximizing <italic>f</italic> is equal to minimizing <inline-formula id="j_infor535_ineq_058"><alternatives><mml:math>
<mml:mo>−</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi></mml:math><tex-math><![CDATA[$-f$]]></tex-math></alternatives></inline-formula> (Cruz <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_008">2022a</xref>).</p>
<p>It is also relevant to highlight that the incisive mode of Tangram CW is not likely to be used for this problem. However, it is defined for generality, as it addresses a potentially unwanted situation like the original method. Other applications of Tangram CW in which very few function evaluations are allowed could benefit from it.</p>
</sec>
</sec>
</sec>
<sec id="j_infor535_s_017">
<label>3</label>
<title>Experimentation and Results</title>
<p>This section starts by explaining the implementation of the proposed solution. The description covers both software and hardware. After that, it presents the dataset used for the different experiments. Specifically, the experimentation initially compares the performance of the proposed optimizer, Tangram CW, to the state-of-the-art global methods for shape similarity-based screening OptiPharm and 2L-Go-Pharm. The comparison replicates the benchmarks defined by those methods for rigid compounds and focuses on reducing the computational budget. In this context, the second experimentation stage studies the effectiveness of the proposed filtering strategy to discard unwanted compounds while keeping the expected ones. The third and last experimentation phase analyses the effectiveness of the proposed optimizer and compound filter when considering the flexibility of the compounds in the dataset, which defines the most challenging situation. Aside from describing how the proposal performs, the study also discusses the general benefits of considering flexibility despite the practical difficulties, as it is only possible when relying on highly efficient methods.</p>
<sec id="j_infor535_s_018">
<label>3.1</label>
<title>Implementation and Hardware Setup</title>
<p>The proposed solution has been implemented in MATLAB (<xref ref-type="bibr" rid="j_infor535_ref_026">2018</xref>) and C through the MEX API to accelerate the most computationally demanding parts (Getreuer, <xref ref-type="bibr" rid="j_infor535_ref_015">2010</xref>; The MathWorks Inc., <xref ref-type="bibr" rid="j_infor535_ref_042">2022</xref>). Specifically, the parallel database exploration processes, the compound filter, and the optimizer are written in MATLAB. Conversely, those linked to the objective function calculation are written in C and compiled as MATLAB Executable files through the MEX API. This way, the overall procedure is wrapped into the MATLAB environment. As a high-level-of-abstraction language, its use results in concise and maintainable code that allows modifications easily, especially considering the numerous toolboxes developed for MATLAB (Cruz <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_008">2022a</xref>). At the same time, the most computationally intensive part remains written in C and compiled for the architecture to run more efficiently. To become conscious of the effectiveness of this approach, at preliminary experimentation, the same virtual screening process was accelerated by 4.11 times after replacing the initial MATLAB version of the objective function with the C-MEX one in the development workstation. The implemented software package is publicly available in Cruz <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_010">2023</xref>).</p>
<p>Regarding the hardware used, the development workstation features an Intel Core i7 processor with 4 physical cores and 32 GB of RAM running Xubuntu 18.04. Aside from development purposes, this machine has also been used for the experiments with rigid compounds. However, for the virtual screening processes dealing with flexible compounds through the generation of multiple conformations, a node of the cluster of the Supercomputing – Algorithms research group from the University of Almería, Spain, is used. It has 2 AMD EPYC Rome 7 642 with 48 cores each (96 in total) and 512 GB of RAM.</p>
</sec>
<sec id="j_infor535_s_019">
<label>3.2</label>
<title>Food and Drug Administration (FDA) Database</title>
<p>The Food and Drug Administration (FDA) (Ciociola <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_006">2014</xref>) is a federal agency of the United States Department of Health and Human Services. It is responsible for safeguarding and improving public health through the regulation of prescription and over-the-counter medications. Among the resources that they offer publicly, there is a dataset containing 1 751 molecules representing safe and approved drugs for use in humans in the USA. In the current context, it is customary to identify compound pairs in the FDA database that exhibit a high level of similarity. The background to this is that finding new compounds can be a valuable approach to drug discovery, as it can potentially lead to a more effective, safer, and more efficient development of new treatments (Wishart, <xref ref-type="bibr" rid="j_infor535_ref_046">2006</xref>).</p>
<p>Additionally, the selection of these compounds was used to test the software with flexible compounds. For this purpose, the software OMEGA (Hawkins <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_017">2010</xref>) was used with the default configuration and the maximum number of conformations was set at 500. Consequently, a novel database of 279 756 conformations was generated from the original 1751 through this process.</p>
</sec>
<sec id="j_infor535_s_020">
<label>3.3</label>
<title>Battery of Searches for Rigid Compounds</title>
<p>The performance of the proposal has been first studied by replicating one of the most descriptive tests considered when introducing OptiPharm and 2L-GO-Pharm. Namely, the benchmark consists of 40 query molecules from the FDA database, without flexibility, and considering hydrogen atoms. The latter aspect is highly relevant because some state-of-the-art tools, such as WEGA, omit hydrogen atoms during the search to accelerate the process, yet it may affect results. Nevertheless, OptiPharm achieved competitive results considering them. The optimizer used its robust configuration, which provides the method with a computational budget of 200 000 (200k) objective function evaluations for every positioning procedure. The reader can find more details about these results in Puertas-Martín <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_031">2019</xref>) (Table 5). Later, 2L-GO-Pharm obtained comparable results after reducing the computational budget to 150k (Ferrández <italic>et al.</italic>, <xref ref-type="bibr" rid="j_infor535_ref_012">2022</xref>). They define the ground truth for the proposal, i.e. Tangram CW with and without compound filtering (and supported by parallel computing at both levels).</p>
<p>The local search method, SASS, uses its default configuration as suggested in Cruz <italic>et al.</italic> (<xref ref-type="bibr" rid="j_infor535_ref_009">2022b</xref>) and also done by OptiPharm. The configuration of Tangram CW was adjusted after preliminary experimentation letting it use from 5 to 20k objective function evaluations and local budgets ranging from 32 to 256. The first problem dimension, i.e. the only angular rotation, was finally set to wrap around at bounds. The selected configuration defines 20k function evaluations, and every local search takes 128. Hence, Tangram CW will work with 10% and 13.33% of the computational budgets of OptiPharm and 2L-GO-Pharm, respectively.</p>
<table-wrap id="j_infor535_tab_001">
<label>Table 1</label>
<caption>
<p>Results of Tangram CW (20k evaluations) compared to OptiPharm (200k evaluations) and 2L-GO-Pharm (150k evaluations) when searching for 40 rigid compounds.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: middle; text-align: left; border-top: solid thin"/>
<td style="vertical-align: middle; text-align: left; border-top: solid thin"/>
<td style="vertical-align: middle; text-align: left; border-top: solid thin"/>
<td colspan="2" style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">OptiPharm/2L-GO-Pharm</td>
<td colspan="2" style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Tangram CW</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">ID</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Query</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Atoms</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Found C.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Value</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Found C.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Value</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">1</td>
<td style="vertical-align: top; text-align: left">DB00529</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">DB09294</td>
<td style="vertical-align: top; text-align: left">0.87</td>
<td style="vertical-align: top; text-align: left"><bold>DB09147</bold></td>
<td style="vertical-align: top; text-align: left">0.87</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">2</td>
<td style="vertical-align: top; text-align: left">DB00331</td>
<td style="vertical-align: top; text-align: left">20</td>
<td style="vertical-align: top; text-align: left">DB09210</td>
<td style="vertical-align: top; text-align: left">0.86</td>
<td style="vertical-align: top; text-align: left">DB09210</td>
<td style="vertical-align: top; text-align: left">0.86</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">3</td>
<td style="vertical-align: top; text-align: left">DB01352</td>
<td style="vertical-align: top; text-align: left">29</td>
<td style="vertical-align: top; text-align: left">DB00306</td>
<td style="vertical-align: top; text-align: left">0.89</td>
<td style="vertical-align: top; text-align: left">DB00306</td>
<td style="vertical-align: top; text-align: left">0.89</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">DB01365</td>
<td style="vertical-align: top; text-align: left">30</td>
<td style="vertical-align: top; text-align: left">DB00191</td>
<td style="vertical-align: top; text-align: left"><bold>0.94</bold></td>
<td style="vertical-align: top; text-align: left">DB00191</td>
<td style="vertical-align: top; text-align: left">0.93</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">DB00380</td>
<td style="vertical-align: top; text-align: left">35</td>
<td style="vertical-align: top; text-align: left">DB01041</td>
<td style="vertical-align: top; text-align: left">0.85</td>
<td style="vertical-align: top; text-align: left">DB01041</td>
<td style="vertical-align: top; text-align: left">0.85</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">6</td>
<td style="vertical-align: top; text-align: left">DB06216</td>
<td style="vertical-align: top; text-align: left">37</td>
<td style="vertical-align: top; text-align: left">DB00370</td>
<td style="vertical-align: top; text-align: left">0.88</td>
<td style="vertical-align: top; text-align: left">DB00370</td>
<td style="vertical-align: top; text-align: left">0.88</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">7</td>
<td style="vertical-align: top; text-align: left">DB00693</td>
<td style="vertical-align: top; text-align: left">37</td>
<td style="vertical-align: top; text-align: left">DB01619</td>
<td style="vertical-align: top; text-align: left">0.86</td>
<td style="vertical-align: top; text-align: left">DB01619</td>
<td style="vertical-align: top; text-align: left">0.86</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">8</td>
<td style="vertical-align: top; text-align: left">DB07615</td>
<td style="vertical-align: top; text-align: left">40</td>
<td style="vertical-align: top; text-align: left">DB00721</td>
<td style="vertical-align: top; text-align: left">0.79</td>
<td style="vertical-align: top; text-align: left">DB00721</td>
<td style="vertical-align: top; text-align: left">0.79</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">9</td>
<td style="vertical-align: top; text-align: left">DB09219</td>
<td style="vertical-align: top; text-align: left">40</td>
<td style="vertical-align: top; text-align: left">DB01320</td>
<td style="vertical-align: top; text-align: left">0.85</td>
<td style="vertical-align: top; text-align: left">DB01320</td>
<td style="vertical-align: top; text-align: left">0.85</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">DB00674</td>
<td style="vertical-align: top; text-align: left">42</td>
<td style="vertical-align: top; text-align: left">DB01619</td>
<td style="vertical-align: top; text-align: left">0.80</td>
<td style="vertical-align: top; text-align: left">DB01619</td>
<td style="vertical-align: top; text-align: left">0.80</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">11</td>
<td style="vertical-align: top; text-align: left">DB01198</td>
<td style="vertical-align: top; text-align: left">45</td>
<td style="vertical-align: top; text-align: left">DB00402</td>
<td style="vertical-align: top; text-align: left">0.89</td>
<td style="vertical-align: top; text-align: left">DB00402</td>
<td style="vertical-align: top; text-align: left">0.89</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">12</td>
<td style="vertical-align: top; text-align: left">DB00887</td>
<td style="vertical-align: top; text-align: left">45</td>
<td style="vertical-align: top; text-align: left">DB00837</td>
<td style="vertical-align: top; text-align: left">0.74</td>
<td style="vertical-align: top; text-align: left">DB00837</td>
<td style="vertical-align: top; text-align: left">0.74</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">13</td>
<td style="vertical-align: top; text-align: left">DB00246</td>
<td style="vertical-align: top; text-align: left">50</td>
<td style="vertical-align: top; text-align: left">DB01261</td>
<td style="vertical-align: top; text-align: left"><bold>0.76</bold></td>
<td style="vertical-align: top; text-align: left">DB01261</td>
<td style="vertical-align: top; text-align: left">0.75</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">14</td>
<td style="vertical-align: top; text-align: left">DB00381</td>
<td style="vertical-align: top; text-align: left">53</td>
<td style="vertical-align: top; text-align: left">DB01023</td>
<td style="vertical-align: top; text-align: left">0.83</td>
<td style="vertical-align: top; text-align: left">DB01023</td>
<td style="vertical-align: top; text-align: left">0.83</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">15</td>
<td style="vertical-align: top; text-align: left">DB09237</td>
<td style="vertical-align: top; text-align: left">54</td>
<td style="vertical-align: top; text-align: left">DB01054</td>
<td style="vertical-align: top; text-align: left">0.75</td>
<td style="vertical-align: top; text-align: left">DB01054</td>
<td style="vertical-align: top; text-align: left">0.75</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">16</td>
<td style="vertical-align: top; text-align: left">DB00876</td>
<td style="vertical-align: top; text-align: left">54</td>
<td style="vertical-align: top; text-align: left">DB09039</td>
<td style="vertical-align: top; text-align: left">0.67</td>
<td style="vertical-align: top; text-align: left">DB09039</td>
<td style="vertical-align: top; text-align: left">0.67</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">17</td>
<td style="vertical-align: top; text-align: left">DB00254</td>
<td style="vertical-align: top; text-align: left">55</td>
<td style="vertical-align: top; text-align: left">DB00595</td>
<td style="vertical-align: top; text-align: left">0.85</td>
<td style="vertical-align: top; text-align: left">DB00595</td>
<td style="vertical-align: top; text-align: left">0.85</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">18</td>
<td style="vertical-align: top; text-align: left">DB00351</td>
<td style="vertical-align: top; text-align: left">57</td>
<td style="vertical-align: top; text-align: left">DB04839</td>
<td style="vertical-align: top; text-align: left">0.93</td>
<td style="vertical-align: top; text-align: left">DB04839</td>
<td style="vertical-align: top; text-align: left">0.93</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">19</td>
<td style="vertical-align: top; text-align: left">DB01196</td>
<td style="vertical-align: top; text-align: left">60</td>
<td style="vertical-align: top; text-align: left">DB00286</td>
<td style="vertical-align: top; text-align: left">0.80</td>
<td style="vertical-align: top; text-align: left">DB00286</td>
<td style="vertical-align: top; text-align: left">0.80</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">20</td>
<td style="vertical-align: top; text-align: left">DB01621</td>
<td style="vertical-align: top; text-align: left">66</td>
<td style="vertical-align: top; text-align: left">DB01148</td>
<td style="vertical-align: top; text-align: left"><bold>0.72</bold></td>
<td style="vertical-align: top; text-align: left">DB01148</td>
<td style="vertical-align: top; text-align: left">0.71</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">21</td>
<td style="vertical-align: top; text-align: left">DB09236</td>
<td style="vertical-align: top; text-align: left">66</td>
<td style="vertical-align: top; text-align: left">DB01054</td>
<td style="vertical-align: top; text-align: left">0.68</td>
<td style="vertical-align: top; text-align: left">DB01054</td>
<td style="vertical-align: top; text-align: left">0.68</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">22</td>
<td style="vertical-align: top; text-align: left">DB08903</td>
<td style="vertical-align: top; text-align: left">69</td>
<td style="vertical-align: top; text-align: left">DB00333</td>
<td style="vertical-align: top; text-align: left">0.68</td>
<td style="vertical-align: top; text-align: left">DB00333</td>
<td style="vertical-align: top; text-align: left">0.68</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">23</td>
<td style="vertical-align: top; text-align: left">DB00632</td>
<td style="vertical-align: top; text-align: left">69</td>
<td style="vertical-align: top; text-align: left">DB00464</td>
<td style="vertical-align: top; text-align: left">0.74</td>
<td style="vertical-align: top; text-align: left">DB00464</td>
<td style="vertical-align: top; text-align: left">0.74</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">24</td>
<td style="vertical-align: top; text-align: left">DB01419</td>
<td style="vertical-align: top; text-align: left">70</td>
<td style="vertical-align: top; text-align: left">DB06605</td>
<td style="vertical-align: top; text-align: left">0.67</td>
<td style="vertical-align: top; text-align: left">DB06605</td>
<td style="vertical-align: top; text-align: left">0.67</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">25</td>
<td style="vertical-align: top; text-align: left">DB00320</td>
<td style="vertical-align: top; text-align: left">80</td>
<td style="vertical-align: top; text-align: left">DB00728</td>
<td style="vertical-align: top; text-align: left">0.62</td>
<td style="vertical-align: top; text-align: left">DB00728</td>
<td style="vertical-align: top; text-align: left">0.62</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">26</td>
<td style="vertical-align: top; text-align: left">DB00728</td>
<td style="vertical-align: top; text-align: left">91</td>
<td style="vertical-align: top; text-align: left">DB01339</td>
<td style="vertical-align: top; text-align: left">0.84</td>
<td style="vertical-align: top; text-align: left">DB01339</td>
<td style="vertical-align: top; text-align: left">0.84</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">27</td>
<td style="vertical-align: top; text-align: left">DB00503</td>
<td style="vertical-align: top; text-align: left">98</td>
<td style="vertical-align: top; text-align: left">DB00701</td>
<td style="vertical-align: top; text-align: left">0.54</td>
<td style="vertical-align: top; text-align: left"><bold>DB01336</bold></td>
<td style="vertical-align: top; text-align: left">0.54</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">28</td>
<td style="vertical-align: top; text-align: left">DB01232</td>
<td style="vertical-align: top; text-align: left">100</td>
<td style="vertical-align: top; text-align: left">DB00212</td>
<td style="vertical-align: top; text-align: left">0.62</td>
<td style="vertical-align: top; text-align: left">DB00212</td>
<td style="vertical-align: top; text-align: left">0.62</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">29</td>
<td style="vertical-align: top; text-align: left">DB00309</td>
<td style="vertical-align: top; text-align: left">110</td>
<td style="vertical-align: top; text-align: left">DB00541</td>
<td style="vertical-align: top; text-align: left">0.62</td>
<td style="vertical-align: top; text-align: left">DB00541</td>
<td style="vertical-align: top; text-align: left">0.62</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">30</td>
<td style="vertical-align: top; text-align: left">DB04786</td>
<td style="vertical-align: top; text-align: left">120</td>
<td style="vertical-align: top; text-align: left">DB00511</td>
<td style="vertical-align: top; text-align: left">0.43</td>
<td style="vertical-align: top; text-align: left">DB00511</td>
<td style="vertical-align: top; text-align: left">0.43</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">31</td>
<td style="vertical-align: top; text-align: left">DB09114</td>
<td style="vertical-align: top; text-align: left">130</td>
<td style="vertical-align: top; text-align: left"><sup>∗</sup>DB08993<sup>∗</sup> </td>
<td style="vertical-align: top; text-align: left"><sup>∗</sup>0.51<sup>∗</sup></td>
<td style="vertical-align: top; text-align: left"><bold>DB01321</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.52</bold></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">32</td>
<td style="vertical-align: top; text-align: left">DB06439</td>
<td style="vertical-align: top; text-align: left">137</td>
<td style="vertical-align: top; text-align: left">DB00207</td>
<td style="vertical-align: top; text-align: left">0.59</td>
<td style="vertical-align: top; text-align: left">DB00207</td>
<td style="vertical-align: top; text-align: left">0.59</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">33</td>
<td style="vertical-align: top; text-align: left">DB01078</td>
<td style="vertical-align: top; text-align: left">140</td>
<td style="vertical-align: top; text-align: left">DB00511</td>
<td style="vertical-align: top; text-align: left">0.58</td>
<td style="vertical-align: top; text-align: left"><bold>DB00390</bold></td>
<td style="vertical-align: top; text-align: left">0.58</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">34</td>
<td style="vertical-align: top; text-align: left">DB01590</td>
<td style="vertical-align: top; text-align: left">151</td>
<td style="vertical-align: top; text-align: left">DB00877</td>
<td style="vertical-align: top; text-align: left">0.56</td>
<td style="vertical-align: top; text-align: left">DB00877</td>
<td style="vertical-align: top; text-align: left">0.56</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">35</td>
<td style="vertical-align: top; text-align: left">DB04894</td>
<td style="vertical-align: top; text-align: left">152</td>
<td style="vertical-align: top; text-align: left">DB00646</td>
<td style="vertical-align: top; text-align: left">0.54</td>
<td style="vertical-align: top; text-align: left">DB00646</td>
<td style="vertical-align: top; text-align: left">0.54</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">36</td>
<td style="vertical-align: top; text-align: left">DB00403</td>
<td style="vertical-align: top; text-align: left">167</td>
<td style="vertical-align: top; text-align: left">DB08874</td>
<td style="vertical-align: top; text-align: left">0.47</td>
<td style="vertical-align: top; text-align: left">DB08874</td>
<td style="vertical-align: top; text-align: left">0.47</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">37</td>
<td style="vertical-align: top; text-align: left">DB00732</td>
<td style="vertical-align: top; text-align: left">169</td>
<td style="vertical-align: top; text-align: left">DB06287</td>
<td style="vertical-align: top; text-align: left">0.48</td>
<td style="vertical-align: top; text-align: left">DB06287</td>
<td style="vertical-align: top; text-align: left">0.48</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">38</td>
<td style="vertical-align: top; text-align: left">DB00050</td>
<td style="vertical-align: top; text-align: left">194</td>
<td style="vertical-align: top; text-align: left">DB00569</td>
<td style="vertical-align: top; text-align: left">0.49</td>
<td style="vertical-align: top; text-align: left">DB00569</td>
<td style="vertical-align: top; text-align: left">0.49</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">39</td>
<td style="vertical-align: top; text-align: left">DB06699</td>
<td style="vertical-align: top; text-align: left">221</td>
<td style="vertical-align: top; text-align: left">DB09099</td>
<td style="vertical-align: top; text-align: left">0.51</td>
<td style="vertical-align: top; text-align: left">DB09099</td>
<td style="vertical-align: top; text-align: left">0.51</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">40</td>
<td style="vertical-align: top; text-align: left">DB06219</td>
<td style="vertical-align: top; text-align: left">229</td>
<td style="vertical-align: top; text-align: left">DB00512</td>
<td style="vertical-align: top; text-align: left">0.44</td>
<td style="vertical-align: top; text-align: left"><bold>DB06287</bold></td>
<td style="vertical-align: top; text-align: left">0.44</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Mean:</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">–</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">86</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">–</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.70</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">–</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.70</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Table <xref rid="j_infor535_tab_001">1</xref> contains the results achieved for the described test. The first column has the number of every test for easier referring. The second column shows the name of the query of the reference molecule. The third one includes its number of atoms (considering those of hydrogen). The fourth column displays the most similar compound found by OptiPharm and 2L-GO-Pharm for every case. It is followed by the approximated value of the objective function that they found in the fifth column. This representation assumes that OptiPharm and 2L-GO-Pharm behave equally to save room, and that is true in 39 of the 40 cases. However, the result that the latter finds for the thirty-first case is the same as our proposal in reality. The asterisks warn the reader about this detail. Analogously, the sixth and seventh columns show the most similar compound suggested by Tangram CW and its value in the position achieved at optimization, respectively. The last row includes the average of the number of atoms and the value of the results found by the reference optimizers and Tangram CW in the corresponding columns. The values in bold font highlight either a relative victory of a method over the other or an interesting situation, and they are commented below.</p>
<p>For the sake of completeness, notice that it took approximately 9 hours to complete the test in the workstation, running in parallel in four cores and without compound filtering. Regardless, as run times are machine-dependent, the focus will stay on function evaluations.</p>
<p>At first glance, the reference optimizers and Tangram CW find the same results in most cases, covering both the suggested compound and the assessment. In five cases, i.e. 1, 27, 31 (related to OptiPharm only), 33, and 40, Tangram CW suggested different yet equally-ranked compounds. This situation, promoted when changing the methods, is interesting as it might catch the attention of analysts over new compounds for later stages of experimentation. Regardless, as mentioned, most records in Table <xref rid="j_infor535_tab_001">1</xref> are the same on either side. There are only four numerical variations in bold (4, 13, 20 (left), and 31 (right)), and they are negligible in this context, with the resulting averages also being equal. Accordingly, both sides are equivalent in practical terms.</p>
<p>Nevertheless, it is necessary to remember that the computational budget of Tangram CW is approximately a tenth of that of the reference methods. More specifically, our optimizer completes the benchmark after consuming <inline-formula id="j_infor535_ineq_059"><alternatives><mml:math>
<mml:mn>20</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>40</mml:mn>
<mml:mo>×</mml:mo>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>751</mml:mn>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$20\hspace{0.1667em}000\times 40\times (1\hspace{0.1667em}751-1)$]]></tex-math></alternatives></inline-formula> function evaluations (the subtraction is due to the self-exclusion of compounds). Conversely, replacing the first term of that expression with either 200 000 (OptiPharm) or 150 000 (2L-GO-Pharm) significantly increases the computational cost. Additionally, as OptiPharm stands out as a highly-configurable method compatible with tighter computational budgets, it was also executed with the same limit as Tangram CW, i.e. 20 000 function evaluations. This configuration resulted in OptiPharm suggesting sub-optimal compounds in 10 out of the 40 cases, i.e. its failure rate raised from 0 to 25%.</p>
<p>Therefore, as intended, the results confirm that the proposed optimizer is significantly more efficient than the previous global optimization approaches for shape similarity-based screening, i.e. OptiPharm and 2L-GO-Pharm. This aspect is critical when the databases increase in size, as when considering flexibility. Aside from that, our optimizer is also simpler to implement and tune.</p>
</sec>
<sec id="j_infor535_s_021">
<label>3.4</label>
<title>Preliminary Compound Filtering</title>
<p>The aforementioned consumption of function evaluations for the benchmark of rigid compounds has two variable terms, the computational budget per positioning case and the number of compounds in the database. Tangram CW has already made it possible to change the former from either 200k or 150k to 20k only. However, the proposed compound filter can also help us to reduce the latter by minimizing the number of compounds that pass to a complete optimization-based positioning process. Some readers might consider 1 751 low enough, but it is only a benchmark. Other databases can increase the number of options significantly. Besides, the chosen benchmark will increase in size from 1 751 to 279 756 after considering flexibility through the generation of conformations, as covered in the next section. Therefore, it is highly relevant to be able to reduce the number of compounds to consider during the virtual screening process.</p>
<p>In this context, the knowledge-based filtering strategy has been tested for the previous benchmark in its two main configurations. Specifically, the filter has been launched to reduce the number of rigid compounds from the FDA database i) considering a degradation threshold with respect to the best ranked and ii) directly selecting a user-given number of the best. Table <xref rid="j_infor535_tab_002">2</xref> contains the results obtained. The first three columns refer to fixed selections, i.e. the filtering parameter <inline-formula id="j_infor535_ineq_060"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula> is an integer greater than 1. The options considered are keeping the 100, 250, and 500 most promising compounds after the preliminary assessment considering four predefined positioning vectors. Analogously, the last three columns refer to the other approach, when <inline-formula id="j_infor535_ineq_061"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext></mml:math><tex-math><![CDATA[$\textit{qnt}$]]></tex-math></alternatives></inline-formula> is a decimal value between 0 and 1 linked to a degradation percentage from the best preliminary ranked. The first row shows the success rate considering the 40 cases. Every case is tagged as successful when the best-ranked compound known from the previous section passes the filter and will be among the options seen by the optimizer. Otherwise, it is tagged as failed, as optimizers will not be able to consider the preferred compound. The second row displays the average number of compounds left after filtering.</p>
<table-wrap id="j_infor535_tab_002">
<label>Table 2</label>
<caption>
<p>Effect of compound filtering over the rigid dataset.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"/>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_infor535_ineq_062"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>100</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=100$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_infor535_ineq_063"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>250</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=250$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_infor535_ineq_064"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>500</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=500$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_infor535_ineq_065"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>0.25</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=0.25$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_infor535_ineq_066"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>0.30</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=0.30$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_infor535_ineq_067"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>0.35</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=0.35$]]></tex-math></alternatives></inline-formula></td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Success rate</td>
<td style="vertical-align: top; text-align: left">75%</td>
<td style="vertical-align: top; text-align: left">85%</td>
<td style="vertical-align: top; text-align: left">92.5%</td>
<td style="vertical-align: top; text-align: left">82.5%</td>
<td style="vertical-align: top; text-align: left">92.5%</td>
<td style="vertical-align: top; text-align: left">100%</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td style="vertical-align: top; text-align: left">(30/40)</td>
<td style="vertical-align: top; text-align: left">(34/40)</td>
<td style="vertical-align: top; text-align: left">(37/40)</td>
<td style="vertical-align: top; text-align: left">(33/40)</td>
<td style="vertical-align: top; text-align: left">(37/40)</td>
<td style="vertical-align: top; text-align: left">(40/40)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Ave. no. of compounds left</td>
<td style="vertical-align: top; text-align: left">100</td>
<td style="vertical-align: top; text-align: left">250</td>
<td style="vertical-align: top; text-align: left">500</td>
<td style="vertical-align: top; text-align: left">205</td>
<td style="vertical-align: top; text-align: left">348</td>
<td style="vertical-align: top; text-align: left">522</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"/>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(5.71%)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(14.28%)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(28.56%)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(11.71%)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(19.87%)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(29.81%)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>It is possible to achieve high success rates despite discarding multiple compounds before optimization. Even if one keeps the 100 most promising compounds, i.e. the results of the first column, the expected compound passes 75% of the cases, and it is the most aggressive filtering configuration considered. Let us study the resulting computational effort using this configuration compared to the previous study that omitted filtering. Without a filter, the number of function evaluations taken by our proposal to obtain the results of every case, i.e. every row of Table <xref rid="j_infor535_tab_001">1</xref>, is <inline-formula id="j_infor535_ineq_068"><alternatives><mml:math>
<mml:mn>20</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>1</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>750</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>35</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn></mml:math><tex-math><![CDATA[$20\hspace{0.1667em}000\times 1\hspace{0.1667em}750=35\hspace{0.1667em}000\hspace{0.1667em}000$]]></tex-math></alternatives></inline-formula>. Completing the benchmark increases this value to <inline-formula id="j_infor535_ineq_069"><alternatives><mml:math>
<mml:mn>40</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>34</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>820</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>1.400</mml:mn>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mn>9</mml:mn></mml:math><tex-math><![CDATA[$40\times 34\hspace{0.1667em}820\hspace{0.1667em}000=1.400\mathrm{e}9$]]></tex-math></alternatives></inline-formula>. Conversely, using the <inline-formula id="j_infor535_ineq_070"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>100</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=100$]]></tex-math></alternatives></inline-formula> four-point filtering lowers the per-case consumption to <inline-formula id="j_infor535_ineq_071"><alternatives><mml:math>
<mml:mn>4</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>1</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>750</mml:mn>
<mml:mo>+</mml:mo>
<mml:mn>20</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>100</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>2</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>007</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn></mml:math><tex-math><![CDATA[$4\times 1\hspace{0.1667em}750+20\hspace{0.1667em}000\times 100=2\hspace{0.1667em}007\hspace{0.1667em}000$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_infor535_ineq_072"><alternatives><mml:math>
<mml:mn>2</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>007</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>40</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>8.028</mml:mn>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mn>7</mml:mn></mml:math><tex-math><![CDATA[$2\hspace{0.1667em}007\hspace{0.1667em}000\times 40=8.028\mathrm{e}7$]]></tex-math></alternatives></inline-formula> for completing the 40 processes, i.e. 5.73% of the computational effort, as that of filtering is almost negligible if compared to optimization-based positioning.</p>
<p>Logically, limiting the selection to 100 implies renouncing the best result known 25% of the cases in this context. Nevertheless, it is possible to find a trade-off. As expected, the success rates improve as the number of kept compounds increases. As shown in Table <xref rid="j_infor535_tab_001">1</xref>, it is possible not to discard any optimal compound and avoid optimizing the position of more than 70% of the database. Specifically, if the filter is set to keep approximately 522 of the most promising compounds, the success rate is the same as when considering the whole database, yet working with less than a third of it.</p>
<p>The number of compounds to keep can be defined either explicitly or implicitly. It depends on working with quantities or percentages, respectively, and it is possible to obtain comparable results. In this context, the reader might wonder about the option to choose. The recommended approach is to define an explicit quantity when it is critical to control the computational effort, as when working with conformations. Conversely, if the main goal is not to discard any promising compound whose ranking can improve after precise optimization-based positioning, degradation percentages should be preferred.</p>
</sec>
<sec id="j_infor535_s_022">
<label>3.5</label>
<title>Searches for Compounds Considering Flexibility</title>
<p>Based on the effectiveness of Tangram CW and the compound filter, the proposed solution has been applied to shape similarity-based screening considering flexible compounds. As mentioned above, this decision implies switching from the 1751 initial compounds to their conformation-based extension containing 279 756.</p>
<p>This approach makes it possible to compare compounds more accurately and achieve better results, but the computational effort starts to be hard to handle. For instance, let us imagine that one user is interested to find the most similar compound to one of the rigid ones in the current dataset. Roughly speaking, it would be necessary to study 279 755 options. Considering the use of Tangram CW with 20 000 objective evaluations per placement, the cost is <inline-formula id="j_infor535_ineq_073"><alternatives><mml:math>
<mml:mn>20</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>279</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>755</mml:mn>
<mml:mo>=</mml:mo>
<mml:mn>5.60</mml:mn>
<mml:mi mathvariant="normal">e</mml:mi>
<mml:mn>9</mml:mn></mml:math><tex-math><![CDATA[$20\hspace{0.1667em}000\times 279\hspace{0.1667em}755=5.60\mathrm{e}9$]]></tex-math></alternatives></inline-formula>. Assuming every compound to be relatively small, i.e. less than 30 atoms, and a computational cost of 0.006 seconds per evaluation in a regular computer, a single row of the rigid benchmark could take <inline-formula id="j_infor535_ineq_074"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>0.006</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>20</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>000</mml:mn>
<mml:mo>×</mml:mo>
<mml:mn>279</mml:mn>
<mml:mspace width="0.1667em"/>
<mml:mn>755</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(0.006\times 20\hspace{0.1667em}000\times 279\hspace{0.1667em}755)$]]></tex-math></alternatives></inline-formula> seconds. This is more than 9 325 hours, i.e. more than a year. Logically, this cost can be attenuated with parallel computing, but the assumptions have also been favourable in terms of the size of the molecules and the run time per evaluation. Hence, every complete search is significantly demanding, even using Tangram CW and its effectiveness with 20k function evaluations. For these reasons, we have executed the search for five different cases of those already addressed without flexibility.</p>
<p>Table <xref rid="j_infor535_tab_003">3</xref> contains the results of using Tangram CW and the compound filter – the cluster node previously mentioned as one of the hardware resources. The first column shows the query or reference compound. After it, the second and third column have the results found with a rigid-only approach, i.e. as shown in Table <xref rid="j_infor535_tab_001">1</xref> with either method, as all achieved the same optimal result. They are the most similar compound found and its value for the optimized positioning vector, respectively. After that, the fourth, fifth, and sixth column contain the results of Tangram CW with a computational budget of 20k and compound filtering with a fixed quantity of 300 compounds. They are the query compound and the most similar one found, including the particular conformation between parentheses, its value, and the run time in hours, respectively. The same scheme is repeated for Tangram CW after changing the filter scheme to a degradation percentage of 0.35. The best values are in bold font.</p>
<table-wrap id="j_infor535_tab_003">
<label>Table 3</label>
<caption>
<p>Results of Tangram CW with 20k evaluations and filtering considering flexibility compared to using rigid compounds only.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: middle; text-align: left; border-top: solid thin"/>
<td colspan="2" style="vertical-align: top; text-align: left; border-top: solid thin">Rigid-only approach</td>
<td colspan="3" style="vertical-align: top; text-align: left; border-top: solid thin">Tangram CW (20k) &amp; <inline-formula id="j_infor535_ineq_075"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>300</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=300$]]></tex-math></alternatives></inline-formula></td>
<td colspan="3" style="vertical-align: top; text-align: left; border-top: solid thin">Tangram CW (20k) &amp; <inline-formula id="j_infor535_ineq_076"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>0.35</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=0.35$]]></tex-math></alternatives></inline-formula></td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Target</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Found C.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Value</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Found C.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Value</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><italic>T</italic> (h)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Found C.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Value</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><italic>T</italic> (h)</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">DB00320</td>
<td style="vertical-align: top; text-align: left">DB00728</td>
<td style="vertical-align: top; text-align: left">0.62</td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00320 (30)</bold> &amp; <bold>DB00696 (31)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.96</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.32</bold></td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00320 (30)</bold> &amp; <bold>DB00696 (31)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.96</bold></td>
<td style="vertical-align: top; text-align: left">1.83</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">DB00331</td>
<td style="vertical-align: top; text-align: left">DB09210</td>
<td style="vertical-align: top; text-align: left">0.86</td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00331 (11) &amp; DB00149 (7)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.92</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.05</bold></td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00331 (11) &amp; DB00149 (7)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.92</bold></td>
<td style="vertical-align: top; text-align: left">0.18</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">DB00380</td>
<td style="vertical-align: top; text-align: left">DB01041</td>
<td style="vertical-align: top; text-align: left">0.85</td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00380 (19) &amp; DB01579 (2)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.90</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.10</bold></td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00380 (19) &amp; DB01579 (2)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.90</bold></td>
<td style="vertical-align: top; text-align: left">1.78</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">DB00381</td>
<td style="vertical-align: top; text-align: left">DB01023</td>
<td style="vertical-align: top; text-align: left">0.83</td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00381 (141) &amp; DB04920 (204)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.87</bold></td>
<td style="vertical-align: top; text-align: left"><bold>2.36</bold></td>
<td rowspan="1" style="vertical-align: middle; text-align: left"><bold>DB00381 (141) &amp; DB04920 (204)</bold></td>
<td style="vertical-align: top; text-align: left"><bold>0.87</bold></td>
<td style="vertical-align: top; text-align: left">69.93</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">DB00632</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">DB00464</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0.74</td>
<td rowspan="1" style="vertical-align: middle; text-align: left; border-bottom: solid thin"><bold>DB00632 (93) &amp; DB09031 (442)</bold></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><bold>0.89</bold></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><bold>3.16</bold></td>
<td rowspan="1" style="vertical-align: middle; text-align: left; border-bottom: solid thin"><bold>DB00632 (93) &amp; DB09031 (442)</bold></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"><bold>0.89</bold></td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">27.56</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The benefits of considering flexibility at virtual screening are evident. The proposed solution outperforms the results obtained without flexibility in the five cases. It is not only a matter of positioning accuracy: the preferred compounds also change. For instance, the first record shows that when ignoring flexibility, the optimizers suggest selecting DB00728 as the most similar to DB00320. The proposal was reasonable for that dataset, and all the optimizers obtained the same (see Table <xref rid="j_infor535_tab_001">1</xref>). However, when considering flexibility, the proposed compound is DB00696 (in its thirty-first conformation). Hence, it is not a limitation of the optimization engine but of the rigid-only approach. That said, as intended, the enhanced efficiency of the proposed method makes it feasible to address it with a more reasonable effort.</p>
<p>Focusing on the results of our proposal, i.e. the right side of Table <xref rid="j_infor535_tab_003">3</xref>, the run times confirm two aspects already mentioned. Firstly, comparing compounds may take significantly different run times depending on their number of atoms and the existing options. Secondly, and related to the latter aspect, fixed-size filtering minimizes the impact of this potential problem. For instance, the process for DB00381 as the query took 2.36 hours when <inline-formula id="j_infor535_ineq_077"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>300</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=300$]]></tex-math></alternatives></inline-formula>, but it raised to 69.93 hours when <inline-formula id="j_infor535_ineq_078"><alternatives><mml:math>
<mml:mtext mathvariant="italic">qnt</mml:mtext>
<mml:mo>=</mml:mo>
<mml:mn>0.35</mml:mn></mml:math><tex-math><![CDATA[$\textit{qnt}=0.35$]]></tex-math></alternatives></inline-formula>. Thus, it remains the preferred option when dealing with the flexibility of compounds (conformations), as introduced. Besides, it is also relevant to highlight that both virtual screening approaches run independently. Despite this situation, the proposed method found the same compounds and assessments. Therefore, the shape similarity-based virtual screening method shows significant robustness.</p>
<p>Additionally, although it has not been highlighted due to its conceptual simplicity, notice that the common parallel database exploration component defines the backbone of the whole process. Considering that the computing platform has almost 100 cores to use, not exploiting it could multiply the run times by up to a factor of 100, which becomes critical in this context.</p>
</sec>
</sec>
<sec id="j_infor535_s_023">
<label>4</label>
<title>Conclusions and Future Work</title>
<p>In virtual screening for computer-aided drug discovery, shape similarity is one of the most used metrics. It requires finding the optimal comparison position between compounds, which is addressed as an optimization problem. Since compound databases are massive and this problem must be solved multiple times, local optimization algorithms are generally used. This strategy implies prioritizing computational speed over exploration capabilities. OptiPharm and 2L-GO-Pharm are two recent methods that apply global search strategies. However, their potentially high consumption of function evaluations and sophisticated tuning is their main deterring factor, especially for working with flexible molecules, as the potential computational cost increases dramatically.</p>
<p>This work has proposed and tested a stack for addressing shape similarity-based virtual screening. It has covered the design of a parallel process for exploring databases of compounds, a new global optimization algorithm, and a knowledge-based filter of compounds. The last two components represent the main contributions. The optimization algorithm, called Tangram CW, is based on a recent meta-heuristic known as Tangram, which needs few function evaluations and is simple to tune. It modifies the division of the search space to work with centroids rather than midpoints and allows defining variables that wrap around their bounds. The knowledge-based compound filter goes through the input database and ranks each compound after studying four descriptive positions. The user can decide with a single parameter whether to choose a fixed selection of the most promising or those whose initial value falls within a user-given degradation factor, which is self-adaptive. This filtering procedure only consumes four function evaluations per candidate compound and saves thousands for every discarded one.</p>
<p>The proposal has been first tested with a benchmark covering the shape similarity-based screening for 40 compounds from a database with 1 751 ones. Tangram CW achieves comparable results to the state-of-the-art methods OptiPharm and 2L-GO-Pharm. However, their computational budget was 200 000 and 150 000 function evaluations, respectively, while Tangram CW needed 20 000. In this context, the compound filter was also tested. On average, it allowed discarding more than two thirds of the compounds while keeping the expected ones for the optimizer to find them. The rigor of filtering can be easily controlled for prioritizing either the computational cost or the quality of the results. It was also possible to reach success rates greater than 90% after ignoring four fifths of the compounds.</p>
<p>Based on these results, the combination of Tangram CW and the compound filter has been tested to perform virtual screening with flexible compounds. This goal required generating the conformations of those in the database, which enlarged the dataset from 1 751 compounds to 279 756. In this context, five cases from the previous benchmark were repeated. Thanks to the light consumption of function evaluations of Tangram CW, the compound filter, and the implicit support of the parallel exploration procedure, it was possible to finish all the cases in a reasonable time. Besides, the facts that the selected compounds differ from the rigid context and the values are significantly higher confirm that supporting flexibility is preferable over not doing so. The proposal of this work advances in this line by offering a simple-to-tune yet effective and efficient stack for virtual screening.</p>
<p>For future work, there are two lines to extend the present study. Firstly, more rigid and flexible benchmarks will be considered. Secondly and lastly, new virtual screening metrics will be added to assess the effectiveness of our proposal when considering a different objective function.</p>
</sec>
</body>
<back>
<ack id="j_infor535_ack_001">
<title>Acknowledgements</title>
<p>The authors would like to thank Professor Leocadio González Casado from the University of Almería for his suggestions about the original Tangram method.</p></ack>
<ref-list id="j_infor535_reflist_001">
<title>References</title>
<ref id="j_infor535_ref_001">
<mixed-citation publication-type="journal"><string-name><surname>Ahmed</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Georgiev</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Capuccini</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Toor</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Schaal</surname>, <given-names>W.</given-names></string-name>, <string-name><surname>Laure</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Spjuth</surname>, <given-names>O.</given-names></string-name> (<year>2018</year>). <article-title>Efficient iterative virtual screening with Apache Spark and conformal prediction</article-title>. <source>Journal of Cheminformatics</source>, <volume>10</volume>, <fpage>8</fpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1186/s13321-018-0265-z" xlink:type="simple">https://doi.org/10.1186/s13321-018-0265-z</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_002">
<mixed-citation publication-type="journal"><string-name><surname>Ban</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Dalal</surname>, <given-names>K.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>LeBlanc</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Rennie</surname>, <given-names>P.S.</given-names></string-name>, <string-name><surname>Cherkasov</surname>, <given-names>A.</given-names></string-name> (<year>2017</year>). <article-title>Best practices of computer-aided drug discovery: lessons learned from the development of a preclinical candidate for prostate cancer with a new mechanism of action</article-title>. <source>Journal of Chemical Information and Modeling</source>, <volume>57</volume>, <fpage>1018</fpage>–<lpage>1028</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1021/acs.jcim.7b00137" xlink:type="simple">https://doi.org/10.1021/acs.jcim.7b00137</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_003">
<mixed-citation publication-type="journal"><string-name><surname>Boussaïd</surname>, <given-names>I.</given-names></string-name>, <string-name><surname>Lepagnot</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Siarry</surname>, <given-names>P.</given-names></string-name> (<year>2013</year>). <article-title>A survey on optimization metaheuristics</article-title>. <source>Information Sciences</source>, <volume>237</volume>, <fpage>82</fpage>–<lpage>117</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_004">
<mixed-citation publication-type="journal"><string-name><surname>Carracedo-Reboredo</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Liñares-Blanco</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Rodríguez-Fernández</surname>, <given-names>N.</given-names></string-name>, <string-name><surname>Cedrón</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Novoa</surname>, <given-names>F.J.</given-names></string-name>, <string-name><surname>Carballal</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Maojo</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Pazos</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Fernández-Lozano</surname>, <given-names>C.</given-names></string-name> (<year>2021</year>). <article-title>A review on machine learning approaches and trends in drug discovery</article-title>. <source>Computational and Structural Biotechnology Journal</source>, <volume>19</volume>, <fpage>4538</fpage>–<lpage>4558</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_005">
<mixed-citation publication-type="journal"><string-name><surname>Cereto-Massagué</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Ojeda</surname>, <given-names>M.J.</given-names></string-name>, <string-name><surname>Valls</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Mulero</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Garcia-Vallvé</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Pujadas</surname>, <given-names>G.</given-names></string-name> (<year>2015</year>). <article-title>Molecular fingerprint similarity search in virtual screening</article-title>. <source>Methods</source>, <volume>71</volume>, <fpage>58</fpage>–<lpage>63</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_006">
<mixed-citation publication-type="journal"><string-name><surname>Ciociola</surname>, <given-names>A.A.</given-names></string-name>, <string-name><surname>Cohen</surname>, <given-names>L.B.</given-names></string-name>, <string-name><surname>Kulkarni</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Kefalas</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Buchman</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Burke</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Cain</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Connor</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Ehrenpreis</surname>, <given-names>E.D.</given-names></string-name>, <string-name><surname>Fang</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Fass</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Karlstadt</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Pambianco</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Phillips</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Pochapin</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Pockros</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Schoenfeld</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Vuppalanchi</surname>, <given-names>R.</given-names></string-name> (<year>2014</year>). <article-title>How drugs are developed and approved by the FDA: current process and future directions</article-title>. <source>American Journal of Gastroenterology</source>, <volume>109</volume>, <fpage>620</fpage>–<lpage>623</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_007">
<mixed-citation publication-type="journal"><string-name><surname>Costa</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Nannicini</surname>, <given-names>G.</given-names></string-name> (<year>2018</year>). <article-title>RBFOpt: an open-source library for black-box optimization with costly function evaluations</article-title>. <source>Mathematical Programming Computation</source>, <volume>10</volume>, <fpage>597</fpage>–<lpage>629</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_008">
<mixed-citation publication-type="journal"><string-name><surname>Cruz</surname>, <given-names>N.C.</given-names></string-name>, <string-name><surname>González-Redondo</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Garrido</surname>, <given-names>J.A.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>E.M.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name> (<year>2022</year>a). <article-title>Black-box and surrogate optimization for tuning spiking neural models of striatum plasticity</article-title>. <source>Frontiers in Neuroinformatics</source>, <volume>16</volume>, <elocation-id>1017222</elocation-id>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.3389/fninf.2022.1017222" xlink:type="simple">https://doi.org/10.3389/fninf.2022.1017222</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_009">
<mixed-citation publication-type="chapter"><string-name><surname>Cruz</surname>, <given-names>N.C.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>E.M.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name> (<year>2022</year>b). <chapter-title>On the design of a new stochastic meta-heuristic for derivative-free optimization</chapter-title>. In: <source>Computational Science and Its Applications–ICCSA 2022 Workshops: Malaga, Spain, July 4–7, 2022, Proceedings, Part II</source>, pp. <fpage>188</fpage>–<lpage>200</lpage>. <publisher-name>Springer</publisher-name>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_010">
<mixed-citation publication-type="other"><string-name><surname>Cruz</surname>, <given-names>N.C.</given-names></string-name>, <string-name><surname>Puertas-Martín</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name> (2023). Source code for ‘An effective solution for drug discovery based on the Tangram meta-heuristic and compound filtering’. <uri>https://github.com/cnelmortimer/Cruz_et_al-INFOR23_Code</uri>. Online: 27-Oct-2023.</mixed-citation>
</ref>
<ref id="j_infor535_ref_011">
<mixed-citation publication-type="journal"><string-name><surname>Ellingson</surname>, <given-names>B.A.</given-names></string-name>, <string-name><surname>Geballe</surname>, <given-names>M.T.</given-names></string-name>, <string-name><surname>Wlodek</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Bayly</surname>, <given-names>C.I.</given-names></string-name>, <string-name><surname>Skillman</surname>, <given-names>A.G.</given-names></string-name>, <string-name><surname>Nicholls</surname>, <given-names>A.</given-names></string-name> (<year>2014</year>). <article-title>Efficient calculation of SAMPL4 hydration free energies using OMEGA, SZYBKI, QUACPAC, and Zap TK</article-title>. <source>Journal of Computer-Aided Molecular Design</source>, <volume>28</volume>, <fpage>289</fpage>–<lpage>298</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_012">
<mixed-citation publication-type="journal"><string-name><surname>Ferrández</surname>, <given-names>M.R.</given-names></string-name>, <string-name><surname>Puertas-Martín</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Pérez-Sánchez</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name> (<year>2022</year>). <article-title>A two-layer mono-objective algorithm based on guided optimization to reduce the computational cost in virtual screening</article-title>. <source>Scientific Reports</source>, <volume>12</volume>(<issue>1</issue>), <fpage>12769</fpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_013">
<mixed-citation publication-type="journal"><string-name><surname>Fu</surname>, <given-names>X.</given-names></string-name>, <string-name><surname>Mervin</surname>, <given-names>L.H.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>X.</given-names></string-name>, <string-name><surname>Yu</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Mohamad Zobir</surname>, <given-names>S.Z.</given-names></string-name>, <string-name><surname>Zoufir</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Zhou</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Song</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>Z.</given-names></string-name>, <string-name><surname>Bender</surname>, <given-names>A.</given-names></string-name> (<year>2017</year>). <article-title>Toward understanding the cold, hot, and neutral nature of Chinese medicines using in silico mode-of-action analysis</article-title>. <source>Journal of Chemical Information and Modeling</source>, <volume>57</volume>, <fpage>468</fpage>–<lpage>483</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_014">
<mixed-citation publication-type="journal"><string-name><surname>García</surname>, <given-names>J.S.</given-names></string-name>, <string-name><surname>Puertas-Martín</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Moreno</surname>, <given-names>J.J.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name> (<year>2023</year>). <article-title>Improving drug discovery through parallelism</article-title>. <source>Journal of Supercomputing</source>, <volume>79</volume>, <fpage>9538</fpage>–<lpage>9557</lpage>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/s11227-022-05014-0" xlink:type="simple">https://doi.org/10.1007/s11227-022-05014-0</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_015">
<mixed-citation publication-type="other"><string-name><surname>Getreuer</surname>, <given-names>P.</given-names></string-name> (2010). <italic>Writing Matlab C/MEX code</italic>. Technical report, Matlab FileExchange.</mixed-citation>
</ref>
<ref id="j_infor535_ref_016">
<mixed-citation publication-type="journal"><string-name><surname>Hamza</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Wei</surname>, <given-names>N.N.</given-names></string-name>, <string-name><surname>Zhan</surname>, <given-names>C.G.</given-names></string-name> (<year>2012</year>). <article-title>Ligand-based virtual screening approach using a new scoring function</article-title>. <source>Journal of Chemical Information and Modeling</source>, <volume>52</volume>, <fpage>963</fpage>–<lpage>974</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_017">
<mixed-citation publication-type="journal"><string-name><surname>Hawkins</surname>, <given-names>P.C.D.</given-names></string-name>, <string-name><surname>Skillman</surname>, <given-names>A.G.</given-names></string-name>, <string-name><surname>Warren</surname>, <given-names>G.L.</given-names></string-name>, <string-name><surname>Ellingson</surname>, <given-names>B.A.</given-names></string-name>, <string-name><surname>Stahl</surname>, <given-names>M.T.</given-names></string-name> (<year>2010</year>). <article-title>Conformer generation with OMEGA: algorithm and validation using high quality structures from the protein databank and Cambridge structural database</article-title>. <source>Journal of Chemical Information and Modeling</source>, <volume>50</volume>, <fpage>572</fpage>–<lpage>584</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_018">
<mixed-citation publication-type="journal"><string-name><surname>Hughes</surname>, <given-names>J.P.</given-names></string-name>, <string-name><surname>Rees</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Kalindjian</surname>, <given-names>S.B.</given-names></string-name>, <string-name><surname>Philpott</surname>, <given-names>K.L.</given-names></string-name> (<year>2011</year>). <article-title>Principles of early drug discovery</article-title>. <source>British Journal of Pharmacology</source>, <volume>162</volume>, <fpage>1239</fpage>–<lpage>1249</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_019">
<mixed-citation publication-type="journal"><string-name><surname>Jelasity</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name>, <string-name><surname>García</surname>, <given-names>I.</given-names></string-name> (<year>2001</year>). <article-title>UEGO, an abstract clustering technique for multimodal global optimization</article-title>. <source>Journal of Heuristics</source>, <volume>7</volume>(<issue>3</issue>), <fpage>215</fpage>–<lpage>233</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_020">
<mixed-citation publication-type="journal"><string-name><surname>Jones</surname>, <given-names>D.R.</given-names></string-name>, <string-name><surname>Martins</surname>, <given-names>J.R.R.A.</given-names></string-name> (<year>2021</year>). <article-title>The DIRECT algorithm: 25 years later</article-title>. <source>Journal of Global Optimization</source>, <volume>79</volume>(<issue>3</issue>), <fpage>521</fpage>–<lpage>566</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_021">
<mixed-citation publication-type="journal"><string-name><surname>Kanhed</surname>, <given-names>A.M.</given-names></string-name>, <string-name><surname>Patel</surname>, <given-names>D.V.</given-names></string-name>, <string-name><surname>Teli</surname>, <given-names>D.M.</given-names></string-name>, <string-name><surname>Patel</surname>, <given-names>N.R.</given-names></string-name>, <string-name><surname>Chhabria</surname>, <given-names>M.T.</given-names></string-name>, <string-name><surname>Yadav</surname>, <given-names>M.R.</given-names></string-name> (<year>2021</year>). <article-title>Identification of potential Mpro inhibitors for the treatment of COVID-19 by using systematic virtual screening approach</article-title>. <source>Molecular Diversity</source>, <volume>25</volume>(<issue>1</issue>), <fpage>383</fpage>–<lpage>401</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_022">
<mixed-citation publication-type="journal"><string-name><surname>Kumar</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>K.Y.J.</given-names></string-name> (<year>2018</year>). <article-title>Advances in the development of shape similarity methods and their application in drug discovery</article-title>. <source>Frontiers in Chemistry</source>, <volume>6</volume>, <fpage>315</fpage>. <ext-link ext-link-type="doi" xlink:href=" https://doi.org/10.3389/fchem.2018.00315" xlink:type="simple"> https://doi.org/10.3389/fchem.2018.00315</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_023">
<mixed-citation publication-type="journal"><string-name><surname>Lančinskas</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name>, <string-name><surname>Žilinskas</surname>, <given-names>J.</given-names></string-name> (<year>2013</year>). <article-title>Multi-objective single agent stochastic search in non-dominated sorting genetic algorithm</article-title>. <source>Nonlinear Analysis: Modelling and Control</source>, <volume>18</volume>(<issue>3</issue>), <fpage>293</fpage>–<lpage>313</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_024">
<mixed-citation publication-type="book"><string-name><surname>Lindfield</surname>, <given-names>G.</given-names></string-name>, <string-name><surname>Penny</surname>, <given-names>J.</given-names></string-name> (<year>2017</year>). <source>Introduction to Nature-Inspired Optimization</source>. <publisher-name>Academic Press</publisher-name>, <publisher-loc>London, UK</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_025">
<mixed-citation publication-type="other"><string-name><surname>Maia</surname>, <given-names>E.H.B.</given-names></string-name>, <string-name><surname>Assis</surname>, <given-names>L.C.</given-names></string-name>, <string-name><surname>De Oliveira</surname>, <given-names>T.A.</given-names></string-name>, <string-name><surname>Da Silva</surname>, <given-names>A.M.</given-names></string-name>, <string-name><surname>Taranto</surname>, <given-names>A.G.</given-names></string-name> (2020). Structure-based virtual screening: from classical to artificial intelligence. <italic>Frontiers in Chemistry</italic>, <italic>8</italic>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.3389/fchem.2020.00343" xlink:type="simple">https://doi.org/10.3389/fchem.2020.00343</ext-link></mixed-citation>
</ref>
<ref id="j_infor535_ref_026">
<mixed-citation publication-type="book"><string-name><surname>MATLAB</surname></string-name> (<year>2018</year>). <source>Version R2018b (MATLAB 9.5)</source>. <publisher-name>The MathWorks Inc.</publisher-name>, <publisher-loc>Natick, Massachusetts</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_027">
<mixed-citation publication-type="journal"><string-name><surname>McInnes</surname>, <given-names>C.</given-names></string-name> (<year>2007</year>). <article-title>Virtual screening strategies in drug discovery</article-title>. <source>Current Opinion in Chemical Biology</source>, <volume>11</volume>, <fpage>494</fpage>–<lpage>502</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_028">
<mixed-citation publication-type="journal"><string-name><surname>Meissner</surname>, <given-names>K.A.</given-names></string-name>, <string-name><surname>Kronenberger</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Maltarollo</surname>, <given-names>V.G.</given-names></string-name>, <string-name><surname>Trossini</surname>, <given-names>G.H.G.</given-names></string-name>, <string-name><surname>Wrenger</surname>, <given-names>C.</given-names></string-name> (<year>2019</year>). <article-title>Targeting the Plasmodium falciparum plasmepsin V by ligand-based virtual screening</article-title>. <source>Chemical Biology &amp; Drug Design</source>, <volume>93</volume>, <fpage>300</fpage>–<lpage>312</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_029">
<mixed-citation publication-type="journal"><string-name><surname>Parois</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Cooper</surname>, <given-names>R.I.</given-names></string-name>, <string-name><surname>Thompson</surname>, <given-names>A.L.</given-names></string-name> (<year>2015</year>). <article-title>Crystal structures of increasingly large molecules: meeting the challenges with CRYSTALS software</article-title>. <source>Chemistry Central Journal</source>, <volume>9</volume>, <fpage>30</fpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_030">
<mixed-citation publication-type="journal"><string-name><surname>Poongavanam</surname>, <given-names>V.</given-names></string-name>, <string-name><surname>Atilaw</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Ye</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Wieske</surname>, <given-names>L.H.E.</given-names></string-name>, <string-name><surname>Erdelyi</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Ermondi</surname>, <given-names>G.</given-names></string-name>, <string-name><surname>Caron</surname>, <given-names>G.</given-names></string-name>, <string-name><surname>Kihlberg</surname>, <given-names>J.</given-names></string-name> (<year>2021</year>). <article-title>Predicting the permeability of macrocycles from conformational sampling – limitations of molecular flexibility</article-title>. <source>Journal of Pharmaceutical Sciences</source>, <volume>110</volume>, <fpage>301</fpage>–<lpage>313</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_031">
<mixed-citation publication-type="journal"><string-name><surname>Puertas-Martín</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name>, <string-name><surname>Pérez-Sánchez</surname>, <given-names>H.</given-names></string-name> (<year>2019</year>). <article-title>OptiPharm: an evolutionary algorithm to compare shape similarity</article-title>. <source>Scientific Reports</source>, <volume>9</volume>(<issue>1</issue>), <fpage>1</fpage>–<lpage>24</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_032">
<mixed-citation publication-type="chapter"><string-name><surname>Puertas-Martín</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Redondo</surname>, <given-names>J.L.</given-names></string-name>, <string-name><surname>Garzón</surname>, <given-names>E.M.</given-names></string-name>, <string-name><surname>Pérez-Sánchez</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Ortigosa</surname>, <given-names>P.M.</given-names></string-name> (<year>2022</year>). <chapter-title>Increasing the accuracy of optipharm’s virtual screening predictions by implementing molecular flexibility</chapter-title>. In: <source>Bioinformatics and Biomedical Engineering, IWBBIO 2022</source>, <series><italic>Lecture Notes in Computer Science</italic></series>, Vol. <volume>13347</volume>. <publisher-name>Springer</publisher-name>, <publisher-loc>Cham</publisher-loc>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/978-3-031-07802-6_20" xlink:type="simple">https://doi.org/10.1007/978-3-031-07802-6_20</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_033">
<mixed-citation publication-type="journal"><string-name><surname>Rao</surname>, <given-names>R.V.</given-names></string-name>, <string-name><surname>Savsani</surname>, <given-names>V.J.</given-names></string-name>, <string-name><surname>Vakharia</surname>, <given-names>D.P.</given-names></string-name> (<year>2012</year>). <article-title>Teaching–learning-based optimization: an optimization method for continuous non-linear large scale problems</article-title>. <source>Information Sciences</source>, <volume>183</volume>(<issue>1</issue>), <fpage>1</fpage>–<lpage>15</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_034">
<mixed-citation publication-type="book"><string-name><surname>Rapaport</surname>, <given-names>D.C.</given-names></string-name> (<year>2004</year>). <source>The Art of Molecular Dynamics Simulation</source>. <publisher-name>Cambridge University Press</publisher-name>, <publisher-loc>Cambridge, UK</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_035">
<mixed-citation publication-type="journal"><string-name><surname>Rogers</surname>, <given-names>D.J.</given-names></string-name>, <string-name><surname>Tanimoto</surname>, <given-names>T.T.</given-names></string-name> (<year>1960</year>). <article-title>A Computer Program for Classifying Plants</article-title>. <source>Science</source>, <volume>132</volume>, <fpage>1115</fpage>–<lpage>1118</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_036">
<mixed-citation publication-type="book"><string-name><surname>Salhi</surname>, <given-names>S.</given-names></string-name> (<year>2017</year>). <source>Heuristic Search: The Emerging Science of Problem Solving</source>. <publisher-name>Springer</publisher-name>, <publisher-loc>Cham, Switzerland</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_037">
<mixed-citation publication-type="book"><string-name><surname>Snyman</surname>, <given-names>J.A.</given-names></string-name>, <string-name><surname>Wilke</surname>, <given-names>D.N.</given-names></string-name> (<year>2005</year>). <source>Practical Mathematical Optimization</source>. <publisher-name>Springer</publisher-name>, <publisher-loc>Cham, Switzerland</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_038">
<mixed-citation publication-type="other"><string-name><surname>Software</surname>, <given-names>O.S.</given-names></string-name>, <string-name><surname>Software</surname>, <given-names>I.O.S.</given-names></string-name>, <string-name><surname>Software</surname>, <given-names>O.S.</given-names></string-name> (2008). ROCS. <italic>Santa Fe, NM</italic>. <uri>http://www.eyesopen.com</uri>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_039">
<mixed-citation publication-type="journal"><string-name><surname>Storn</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Price</surname>, <given-names>K.</given-names></string-name> (<year>1997</year>). <article-title>Differential evolution-a simple and efficient heuristic for global optimization over continuous spaces</article-title>. <source>Journal of Global Optimization</source>, <volume>11</volume>(<issue>4</issue>), <fpage>341</fpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_040">
<mixed-citation publication-type="chapter"><string-name><surname>Sudholt</surname>, <given-names>D.</given-names></string-name> (<year>2015</year>). <chapter-title>Parallel evolutionary algorithms</chapter-title>. In: <string-name><surname>Kacprzyk</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Pedrycz</surname>, <given-names>W.</given-names></string-name> (Eds.), <source>Springer Handbook of Computational Intelligence, Springer Handbooks</source>. <publisher-name>Springer</publisher-name>, <publisher-loc>Berlin, Heidelberg</publisher-loc>. <ext-link ext-link-type="doi" xlink:href="https://doi.org/10.1007/978-3-662-43505-2_46" xlink:type="simple">https://doi.org/10.1007/978-3-662-43505-2_46</ext-link>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_041">
<mixed-citation publication-type="journal"><string-name><surname>Sumudu</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Leelananda</surname>, <given-names>S.P.</given-names></string-name> (<year>2016</year>). <article-title>Computational methods in drug discovery</article-title>. <source>Beilstein Journal of Organic Chemistry</source>, <volume>12</volume>, <fpage>2694</fpage>–<lpage>2718</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_042">
<mixed-citation publication-type="other"><string-name><surname>The MathWorks Inc.</surname></string-name> (2022). Matlab Documentation. The MathWorks Inc., Natick, Massachusetts, United States. <uri>https://www.mathworks.com/help/matlab/</uri>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_043">
<mixed-citation publication-type="book"><string-name><surname>Trobec</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Slivnik</surname>, <given-names>B.</given-names></string-name>, <string-name><surname>Bulić</surname>, <given-names>P.</given-names></string-name>, <string-name><surname>Robič</surname>, <given-names>B.</given-names></string-name> (<year>2018</year>). <source>Introduction to Parallel Computing: From Algorithms to Programming on State-of-the-Art Platforms</source>. <publisher-name>Springer</publisher-name>, <publisher-loc>Cham, Switzerland</publisher-loc>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_044">
<mixed-citation publication-type="journal"><string-name><surname>Van Geit</surname>, <given-names>W.</given-names></string-name>, <string-name><surname>De Schutter</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Achard</surname>, <given-names>P.</given-names></string-name> (<year>2008</year>). <article-title>Automated neuron model optimization techniques: a review</article-title>. <source>Biological Cybernetics</source>, <volume>99</volume>, <fpage>241</fpage>–<lpage>251</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_045">
<mixed-citation publication-type="journal"><string-name><surname>Wang</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Zhang</surname>, <given-names>X.</given-names></string-name>, <string-name><surname>Omarini</surname>, <given-names>A.B.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>B.</given-names></string-name> (<year>2020</year>). <article-title>Virtual screening for functional foods against the main protease of SARS-CoV-2</article-title>. <source>Journal of Food Biochemistry</source>, <volume>44</volume>(<issue>11</issue>), <elocation-id>e13481</elocation-id>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_046">
<mixed-citation publication-type="journal"><string-name><surname>Wishart</surname>, <given-names>D.S.</given-names></string-name> (<year>2006</year>). <article-title>DrugBank: a comprehensive resource for in silico drug discovery and exploration</article-title>. <source>Nucleic Acids Research</source>, <volume>34</volume>, <fpage>668</fpage>–<lpage>672</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_047">
<mixed-citation publication-type="journal"><string-name><surname>Yan</surname>, <given-names>X.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Liu</surname>, <given-names>Z.</given-names></string-name>, <string-name><surname>Zheng</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Ge</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>J.</given-names></string-name> (<year>2013</year>). <article-title>Enhancing molecular shape comparison by weighted Gaussian functions</article-title>. <source>Journal of Chemical Information and Modeling</source>, <volume>53</volume>, <fpage>1967</fpage>–<lpage>1978</lpage>.</mixed-citation>
</ref>
<ref id="j_infor535_ref_048">
<mixed-citation publication-type="journal"><string-name><surname>Zeng</surname>, <given-names>W.</given-names></string-name>, <string-name><surname>Guo</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Xu</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Zhou</surname>, <given-names>J.</given-names></string-name> (<year>2020</year>). <article-title>High-throughput screening technology in industrial biotechnology</article-title>. <source>Trends in Biotechnology</source>, <volume>38</volume>, <fpage>888</fpage>–<lpage>906</lpage>.</mixed-citation>
</ref>
</ref-list>
</back>
</article>
