<?xml version="1.0" encoding="utf-8"?><!DOCTYPE article PUBLIC "-//NLM//DTD JATS (Z39.96) Journal Publishing DTD v1.0 20120330//EN" "JATS-journalpublishing1.dtd"><article xmlns:mml="http://www.w3.org/1998/Math/MathML" xmlns:xlink="http://www.w3.org/1999/xlink" article-type="research-article">
<front>
<journal-meta>
<journal-id journal-id-type="publisher-id">INFORMATICA</journal-id>
<journal-title-group><journal-title>Informatica</journal-title></journal-title-group>
<issn pub-type="epub">1822-8844</issn><issn pub-type="ppub">0868-4952</issn><issn-l>0868-4952</issn-l>
<publisher>
<publisher-name>Vilnius University</publisher-name>
</publisher>
</journal-meta>
<article-meta>
<article-id pub-id-type="publisher-id">INFO1192</article-id>
<article-id pub-id-type="doi">10.15388/Informatica.2018.176</article-id>
<article-categories><subj-group subj-group-type="heading">
<subject>Research Article</subject></subj-group></article-categories>
<title-group>
<article-title>The Modified Method of Logical Analysis Used for Solving Classification Problems</article-title>
</title-group>
<contrib-group>
<contrib contrib-type="author">
<name><surname>Kuzmich</surname><given-names>Roman</given-names></name><email xlink:href="romazmich@gmail.com">romazmich@gmail.com</email><xref ref-type="aff" rid="j_info1192_aff_001">1</xref><bio>
<p><bold>R. Kuzmich</bold> is a candidate of technical sciences, an associate professor of Siberian Federal University (Krasnoyarsk, Russia). His research interests are optimization techniques, modelling, control systems.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Stupina</surname><given-names>Alena</given-names></name><email xlink:href="h677hm@gmail.com">h677hm@gmail.com</email><xref ref-type="aff" rid="j_info1192_aff_002">2</xref><xref ref-type="aff" rid="j_info1192_aff_003">3</xref><xref ref-type="corresp" rid="cor1">∗</xref><bio>
<p><bold>A. Stupina</bold> is a doctor of technical sciences, a professor of Siberian Federal University (Krasnoyarsk, Russia). Her research interests are <italic>n</italic>-version programming, modelling, control systems.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Korpacheva</surname><given-names>Larisa</given-names></name><email xlink:href="korp_0777@mail.ru">korp_0777@mail.ru</email><xref ref-type="aff" rid="j_info1192_aff_002">2</xref><bio>
<p><bold>L. Korpacheva</bold> is a candidate of technical sciences, an associate professor of Siberian Federal University (Krasnoyarsk, Russia). Her research interests are modelling, system analysis.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Ezhemanskaja</surname><given-names>Svetlana</given-names></name><email xlink:href="sve-ta_ezh@inbox.ru">sve-ta_ezh@inbox.ru</email><xref ref-type="aff" rid="j_info1192_aff_002">2</xref><bio>
<p><bold>S. Ezhemanskaja</bold> is a candidate of technical sciences, an associate professor of Siberian Federal University (Krasnoyarsk, Russia). Her research interests are modelling, system analysis.</p></bio>
</contrib>
<contrib contrib-type="author">
<name><surname>Rouiga</surname><given-names>Irina</given-names></name><email xlink:href="irina_rouiga@bk.ru">irina_rouiga@bk.ru</email><xref ref-type="aff" rid="j_info1192_aff_004">4</xref><bio>
<p><bold>I. Rouiga</bold> is a candidate of economical sciences, an associate professor of Siberian Federal University (Krasnoyarsk, Russia). Her research interests are economic-mathematical modelling, investment and innovation policy at the regional level.</p></bio>
</contrib>
<aff id="j_info1192_aff_001"><label>1</label>Department of Computer Science in Business, School of Business Management and Economics <institution>Siberian Federal University</institution>, Krasnoyarsk, <country>Russia</country></aff>
<aff id="j_info1192_aff_002"><label>2</label>Department of Economics and Information Technologies for Management, School of Business Management and Economics, <institution>Siberian Federal University</institution>, Krasnoyarsk, <country>Russia</country></aff>
<aff id="j_info1192_aff_003"><label>3</label>Department of International Management, <institution>Krasnoyarsk State Agrarian University</institution>, Krasnoyarsk, <country>Russia</country></aff>
<aff id="j_info1192_aff_004"><label>4</label>Department of Economics and Business Process Management, School of Business Management and Economics, <institution>Siberian Federal University</institution>, Krasnoyarsk, <country>Russia</country></aff>
</contrib-group>
<author-notes>
<corresp id="cor1"><label>∗</label>Corresponding author.</corresp>
</author-notes>
<pub-date pub-type="ppub"><year>2018</year></pub-date><pub-date pub-type="epub"><day>1</day><month>1</month><year>2018</year></pub-date><volume>29</volume><issue>3</issue><fpage>467</fpage><lpage>486</lpage><history><date date-type="received"><month>10</month><year>2017</year></date><date date-type="accepted"><month>6</month><year>2018</year></date></history>
<permissions><copyright-statement>© 2018 Vilnius University</copyright-statement><copyright-year>2018</copyright-year>
<license license-type="open-access" xlink:href="http://creativecommons.org/licenses/by/4.0/">
<license-p>Open access article under the <ext-link ext-link-type="uri" xlink:href="http://creativecommons.org/licenses/by/4.0/">CC BY</ext-link> license.</license-p></license></permissions>
<abstract>
<p>The study is dictated by the need to interpret and justify the solutions of classification problems. In this context, a method of logical analysis of data is considered along with its modifications based on the specifically developed algorithmic procedures, the use of which can increase the interpretability and generalization capability of classifiers. The article confirms in an empirical way that the suggested optimization models are suitable for building informative patterns and that the designed algorithmic procedures are efficient when used for the method of logical analysis of data.</p>
</abstract>
<kwd-group>
<label>Key words</label>
<kwd>classification</kwd>
<kwd>pattern</kwd>
<kwd>degree</kwd>
<kwd>coverage</kwd>
<kwd>information content</kwd>
</kwd-group>
</article-meta>
</front>
<body>
<sec id="j_info1192_s_001">
<label>1</label>
<title>Introduction</title>
<p>Working on solutions to today’s classification problems is often associated with a need for interpreting and justifying the obtained solutions, apart from ensuring their high accuracy. In particular, the interpretability and justification are key factors in finding the solutions to practical problems that threaten great losses in case of a wrong decision.</p>
<p>The latest survey studies in this field have shown that the most promising algorithms, from an interpretability standpoint, are the logical classification algorithms that formulate a decision rule in the form of a list of final rules (Kotsiantis, <xref ref-type="bibr" rid="j_info1192_ref_017">2007</xref>). It is worth noting the scientists who have made the greatest contributions to the development of logical classification algorithms: Yu. Zhuravlyov, K. Rudakov, K. Vorontsov, N. Zagoruyko, P.L. Hammer, T. Bonates, G. Alexe, S. Alexe, Y. Freund, R.E. Schapire.</p>
<p>The most promising research in this field is carried out at the Rutgers University, USA, where they have successfully found solutions to a range of problems, including in medical diagnosis and prediction, by using logical data analysis methods (Alexe <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_001">2002</xref>; Brauner <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_010">2004</xref>; Hammer and Bonates, <xref ref-type="bibr" rid="j_info1192_ref_012">2005</xref>). The acquired results demonstrate the efficiency of the selected approach whose evolution is arguably the foundation of modern decision support systems.</p>
<p>However, at the moment there is a range of challenges associated with the application of the method of logical analysis of data to solving practical classification problems. They include the problem of designing optimization models for building meaningful patterns. When looking into this issue, it is above all necessary to define the criteria and limitations that underpin such optimization models. Another challenge of the reviewed method is about building a classifier that could correctly attribute a new observation, i.e. the observation that was not involved in its creation, to the appropriate class. At this stage of method evolution, the primary task is to increase the interpretability of the classifier and the performance of the classification of new observations, that is, to improve the generalization capability of the classifier.</p>
<p>For the purpose of addressing the foregoing challenges, the article offers modifications to the method of logical analysis of data, which can improve the interpretability and generalization capability of the classifier.</p>
</sec>
<sec id="j_info1192_s_002">
<label>2</label>
<title>Method of Logical Analysis of Data</title>
<sec id="j_info1192_s_003">
<label>2.1</label>
<title>Approach Description</title>
<p>The study considers the classification problem of the following kind (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_019">2014</xref>). There is a data set consisting of two disjoint sets <inline-formula id="j_info1192_ineq_001"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{+}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_002"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{-}}$]]></tex-math></alternatives></inline-formula> of <italic>n</italic>-dimensional vectors belonging to the positive and the negative class, respectively. The components of the vectors, also called attributes, can be both numeric (nominal) and binary (Stupina <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_023">2012</xref>). The task is to subsume a certain new observation, also a vector of <italic>n</italic> variables, under the appropriate class.</p>
<p>The suggested data classification approach is based on the method originating from the theory of combinatorial optimization, which is called <italic>Logical Analysis of Data (LAD</italic>) (Hammer and Bonates, <xref ref-type="bibr" rid="j_info1192_ref_012">2005</xref>). This method has been usefully employed in solving a range of problems in various fields (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_018">2012</xref>; Hammer <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_013">2004a</xref>, <xref ref-type="bibr" rid="j_info1192_ref_014">2004b</xref>; Herrera and Subasi, <xref ref-type="bibr" rid="j_info1192_ref_015">2013</xref>). The key idea of the method is to apply a combination of “differentiation” and “integration” actions to a section of the space of original attributes containing the given positive and negative observations. The “differentiation” stage involves defining a family of small subsets sharing characteristic positive and negative features. At the “integration” stage, the unions of these subsets, created in a specific manner, are treated as the approximations of certain areas of the space of attributes consisting of positive and, consequently, negative observations (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_019">2014</xref>).</p>
<p>The sequence of steps for this method is here (Hammer and Bonates, <xref ref-type="bibr" rid="j_info1192_ref_012">2005</xref>):</p>
<p>a) To remove redundant variables in the original data set, a subset <italic>S</italic> is singled out from the set of variables to help to distinguish positive observations from the negative ones. The further steps of the method utilize the projections <inline-formula id="j_info1192_ineq_003"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_004"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> of the sets <inline-formula id="j_info1192_ineq_005"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{+}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_006"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{-}}$]]></tex-math></alternatives></inline-formula> on <italic>S</italic>.</p>
<p>b) The Ω<inline-formula id="j_info1192_ineq_007"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow/>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${_{s}^{+}}$]]></tex-math></alternatives></inline-formula> set is covered with a family of similar subsets of a smaller space, each of which significantly overlaps with <inline-formula id="j_info1192_ineq_008"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula>, but does not overlap with <inline-formula id="j_info1192_ineq_009"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula>; alternatively, a minor overlapping with <inline-formula id="j_info1192_ineq_010"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> is acceptable if it results in a greater overlapping with <inline-formula id="j_info1192_ineq_011"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula>. Such subsets are called “positive patterns.” In a similar fashion, the <inline-formula id="j_info1192_ineq_012"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> set is covered with “negative patterns.”</p>
<p>c) Then it is necessary to identify the subset of positive patterns whose union covers all <inline-formula id="j_info1192_ineq_013"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> observations and the subset of negative patterns whose union covers all <inline-formula id="j_info1192_ineq_014"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> observations.</p>
<p>d) The fact of whether a certain observation is covered by the union of the two subsets, which are either positive or negative, is then determined using a classifier built on these subsets.</p>
</sec>
<sec id="j_info1192_s_004">
<label>2.2</label>
<title>Binarization of Attributes</title>
<p>The studied method is intended for the use against data sets of binary attributes. Since the original data set can include attributes of various types, it is necessary to binarize them.</p>
<p>One of the simplest binarization methods suggests linking each metric variable to a number of binary variables. A binary variable is assigned 1 if the value of the corresponding metric variable exceeds a certain threshold value, and vice versa. This method is referred to in Rastrigin and Freymanis (<xref ref-type="bibr" rid="j_info1192_ref_021">1988</xref>) as “unitary”. Its flaw lies in the fact that it implies having numerous combinations of binary variables that cannot be linked to any points in the original space <inline-formula id="j_info1192_ineq_015"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo>−</mml:mo>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(2n-n-1)$]]></tex-math></alternatives></inline-formula>. This flaw makes it difficult to use this method for coding the variable arguments of criterion functions when solving optimization problems, as it will generate a great number of invalid solutions. However, in this case, it does not matter as long as classification is concerned, because the binary variables are obtained by coding the predefined metric variables. The main advantage of this method though is the fact that the distances across the original and binary spaces are equal. It means that points closely spaced in the original space will also stay in proximity of each other in the binarized space. This, in its turn, makes it possible, as early as at the binarization stage, to minimize the number of thresholds by mapping close values of the original variable with the equivalent values within the binary space (provided that the positive and negative subsets of observations remain disjoint) (Hammer and Bonates, <xref ref-type="bibr" rid="j_info1192_ref_012">2005</xref>).</p>
<p>Also there exists another binarization method referenced in Vorontsov (<xref ref-type="bibr" rid="j_info1192_ref_026">2010</xref>).</p>
<p>An arbitrary attribute <inline-formula id="j_info1192_ineq_016"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="italic">X</mml:mi>
<mml:mo stretchy="false">↦</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[$f:X\mapsto {D_{f}}$]]></tex-math></alternatives></inline-formula> creates terms verifying that the value of <inline-formula id="j_info1192_ineq_017"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$f(x)$]]></tex-math></alternatives></inline-formula> falls into certain subsets of the <inline-formula id="j_info1192_ineq_018"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${D_{f}}$]]></tex-math></alternatives></inline-formula> set. Some typical structures of this kind are provided in Vorontsov (<xref ref-type="bibr" rid="j_info1192_ref_026">2010</xref>).</p>
<list>
<list-item id="j_info1192_li_001">
<label>–</label>
<p>If <italic>f</italic> is a nominal attribute: 
<disp-formula id="j_info1192_eq_001">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mi mathvariant="italic">β</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mi mathvariant="italic">β</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">⊂</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{array}{l}\displaystyle \beta (x)=\big[f(x)=d\big],\hspace{1em}d\in {D_{f}},\\ {} \displaystyle \beta (x)=\big[f(x)\in {D^{\prime }}\big],\hspace{1em}{D^{\prime }}\subset {D_{f}}.\end{array}\]]]></tex-math></alternatives>
</disp-formula>
</p>
</list-item>
<list-item id="j_info1192_li_002">
<label>–</label>
<p>If <italic>f</italic> is an ordinal or quantitative attribute: 
<disp-formula id="j_info1192_eq_002">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mi mathvariant="italic">β</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>⩽</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mi mathvariant="italic">β</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo>⩽</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>⩽</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">D</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo mathvariant="normal">&lt;</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{array}{l}\displaystyle \beta (x)=\big[f(x)\leqslant d\big],\hspace{1em}d\in {D_{f}},\\ {} \displaystyle \beta (x)=\big[d\leqslant f(x)\leqslant {d^{\prime }}\big],\hspace{1em}d,{d^{\prime }}\in {D_{f}},\hspace{2.5pt}d<{d^{\prime }}.\end{array}\]]]></tex-math></alternatives>
</disp-formula>
</p>
</list-item>
</list>
<p>For a quantitative attribute <inline-formula id="j_info1192_ineq_019"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="italic">X</mml:mi>
<mml:mo stretchy="false">→</mml:mo>
<mml:mi mathvariant="italic">R</mml:mi></mml:math><tex-math><![CDATA[$f:X\to R$]]></tex-math></alternatives></inline-formula>, it is necessary to only consider those threshold values <italic>d</italic> that divide the <inline-formula id="j_info1192_ineq_020"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">X</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>ℓ</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${X^{\ell }}$]]></tex-math></alternatives></inline-formula> set in different ways. After excluding trivial dissections converting <inline-formula id="j_info1192_ineq_021"><alternatives><mml:math>
<mml:mi mathvariant="italic">β</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$\beta (x)$]]></tex-math></alternatives></inline-formula> to 0 or 1 across the whole set, the remaining number of such values will not exceed <inline-formula id="j_info1192_ineq_022"><alternatives><mml:math>
<mml:mi>ℓ</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn></mml:math><tex-math><![CDATA[$\ell -1$]]></tex-math></alternatives></inline-formula>. For instance, it is possible to take thresholds of the following kind: 
<disp-formula id="j_info1192_eq_003">
<label>(1)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>+</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="2em"/>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi>ℓ</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {d_{i}}=\frac{{f^{(i)}}+{f^{(i+1)}}}{2},\hspace{2em}{f^{(i)}}\ne {f^{(i+1)}},\hspace{1em}i=1,\dots ,\ell -1,\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_info1192_ineq_023"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>⩽</mml:mo>
<mml:mo stretchy="false">⋯</mml:mo>
<mml:mo>⩽</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">f</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi>ℓ</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${f^{(1)}}\leqslant \cdots \leqslant {f^{(\ell )}}$]]></tex-math></alternatives></inline-formula> is a sequence of values of the <italic>f</italic> attribute throughout the observations of the set <inline-formula id="j_info1192_ineq_024"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi>ℓ</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$f({x_{1}}),\dots ,f({x_{\ell }})$]]></tex-math></alternatives></inline-formula>, sorted in ascending order.</p>
<p>Should the resulting terms be later intended for the synthesis of conjunctions, it is recommended to pick the most informative ones right away, to cut down on the iterations of sequential search. With ordinal and quantitative attributes, such problem is solved through the optimal partitioning of the range of attribute values into zones. The process of such partitioning is described below.</p>
<p>Suppose <inline-formula id="j_info1192_ineq_025"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo>:</mml:mo>
<mml:mi mathvariant="italic">X</mml:mi>
<mml:mo stretchy="false">→</mml:mo>
<mml:mi mathvariant="italic">R</mml:mi></mml:math><tex-math><![CDATA[$f:X\to R$]]></tex-math></alternatives></inline-formula> is a quantitative attribute, <inline-formula id="j_info1192_ineq_026"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">r</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${d_{1}},\dots ,{d_{r}}$]]></tex-math></alternatives></inline-formula> is an ascending sequence of thresholds. Let us define the zones containing the values of the <italic>f</italic> attribute as terms of the following kind: 
<disp-formula id="j_info1192_eq_004">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">&lt;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩽</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">&lt;</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="italic">s</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">r</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">[</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">d</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">r</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩽</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" fence="true">]</mml:mo>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{array}{l}\displaystyle {\varepsilon _{0}}(x)=\big[f(x)<{d_{1}}\big],\\ {} \displaystyle {\varepsilon _{s}}(x)=\big[{d_{s}}\leqslant f(x)<{d_{s+1}}\big],\hspace{1em}s=1,\dots ,r-1,\\ {} \displaystyle {\varepsilon _{r}}(x)=\big[{d_{r}}\leqslant f(x)\big].\end{array}\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>For example, a greedy algorithm of zone merging starts with dividing them into “small zones.” The thresholds are calculated according to formula (<xref rid="j_info1192_eq_003">1</xref>) and pass through all the pairs of points <inline-formula id="j_info1192_ineq_027"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${x_{i-1}}$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_028"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${x_{i}}$]]></tex-math></alternatives></inline-formula>, of which exactly one belongs to class <italic>k</italic>.</p>
<p>The initial division comprises alternating zones defined as “only <italic>k</italic> – only not <italic>k</italic>”. Later the zones can be consolidated through merging triple points of adjacent zones. It is important to merge specifically triple points, since merging pairs will disrupt the alternation of “<italic>k</italic> – not <italic>k</italic>”, resulting in some “small zones” remaining unmerged in the end. The algorithm of merging zones stops when either of the following criteria is satisfied: a specific number <italic>r</italic> of zones has been reached; or certain original zones <inline-formula id="j_info1192_ineq_029"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\varepsilon _{i-1}}$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_030"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\varepsilon _{i}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_031"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\varepsilon _{i+1}}$]]></tex-math></alternatives></inline-formula> start containing more information than the corresponding merged zone <inline-formula id="j_info1192_ineq_032"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>−</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>∨</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>∨</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">ε</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\varepsilon _{i-1}}\vee {\varepsilon _{i}}\vee {\varepsilon _{i+1}}$]]></tex-math></alternatives></inline-formula>. The three points to merge are selected so as to achieve the maximum gain in information content after the merger.</p>
</sec>
<sec id="j_info1192_s_005">
<label>2.3</label>
<title>Building a Support Set</title>
<p>Representing an excessively large number of attributes in a set can be associated with an enormous computational load. This is the case, for example, in genomics and proteomics, the two most rapidly progressing areas of bioinformatics where the expression for the level of intensity of thousands, if not tens of thousands, genes or proteins is included into the data set, despite the fact that even the smallest subset of these attributes is sufficient to perform an excellent separation of positive and negative observations (Alexe <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_001">2002</xref>). One of the factors that makes it more difficult to extract an informative subset of attributes is the fact that there is a pronounced difference between the information content of individual attributes and the information content of a set of attributes.</p>
<p>It is necessary to devise some approaches to the identification of a subset of attributes that can help separate, with a high degree of accuracy, the positive and negative observations.</p>
<p>One of such approaches based on the selection of a subset of attributes via building an optimization model in the form of a combinatorial optimization task is provided here.</p>
<p>A set <italic>S</italic> of attributes is called a support set if the projection <inline-formula id="j_info1192_ineq_033"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> of the set <inline-formula id="j_info1192_ineq_034"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{+}}$]]></tex-math></alternatives></inline-formula> on <italic>S</italic> does not intersect with the projection <inline-formula id="j_info1192_ineq_035"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> of the set <inline-formula id="j_info1192_ineq_036"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{-}}$]]></tex-math></alternatives></inline-formula> on <italic>S</italic>. The entire set of attributes is a support set since <inline-formula id="j_info1192_ineq_037"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{+}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_038"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${\Omega ^{-}}$]]></tex-math></alternatives></inline-formula> originally do not intersect. A support set can be called minimal, when the elimination of any remaining variable from it leads to a data set in which some positive and negative observations are identical.</p>
<p>In order to find the minimal support set, one needs to assign to each attribute <inline-formula id="j_info1192_ineq_039"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${x_{i}}$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_040"><alternatives><mml:math>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">t</mml:mi></mml:math><tex-math><![CDATA[$i=1,\dots ,t$]]></tex-math></alternatives></inline-formula> of the binary set a new binary variable <inline-formula id="j_info1192_ineq_041"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${y_{i}}$]]></tex-math></alternatives></inline-formula>, which is equal to 1 if <inline-formula id="j_info1192_ineq_042"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${x_{i}}$]]></tex-math></alternatives></inline-formula> belongs to the support set, and to 0 otherwise. One denotes the binary vector associated with positive observations as <inline-formula id="j_info1192_ineq_043"><alternatives><mml:math>
<mml:mi mathvariant="italic">U</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$U=({u_{1}},{u_{2}},\dots ,{u_{t}})$]]></tex-math></alternatives></inline-formula> and the one associated with negative observations as <inline-formula id="j_info1192_ineq_044"><alternatives><mml:math>
<mml:mi mathvariant="italic">V</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$V=({v_{1}},{v_{2}},\dots ,{v_{t}})$]]></tex-math></alternatives></inline-formula>. A new variable is then introduced: 
<disp-formula id="j_info1192_eq_005">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">U</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">V</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mfenced separators="" open="{" close="">
<mml:mrow>
<mml:mtable columnspacing="4.0pt" equalrows="false" columnlines="none" equalcolumns="false" columnalign="left left">
<mml:mtr>
<mml:mtd class="array">
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd class="array">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd class="array">
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">u</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">v</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {w_{i}}(U,V)=\left\{\begin{array}{l@{\hskip4.0pt}l}1,\hspace{1em}& {u_{i}}\ne {v_{i}},\\ {} 0,\hspace{1em}& {u_{i}}={v_{i}}.\end{array}\right.\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>The separability of the sets <inline-formula id="j_info1192_ineq_045"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_046"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> is then conditioned by holding the inequation <inline-formula id="j_info1192_ineq_047"><alternatives><mml:math>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">U</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">V</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩾</mml:mo>
<mml:mn>1</mml:mn></mml:math><tex-math><![CDATA[$\textstyle\sum {w_{i}}(U,V){y_{i}}\geqslant 1$]]></tex-math></alternatives></inline-formula> for any <inline-formula id="j_info1192_ineq_048"><alternatives><mml:math>
<mml:mi mathvariant="italic">U</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[$U\in {\Omega _{S}^{+}}$]]></tex-math></alternatives></inline-formula> and <inline-formula id="j_info1192_ineq_049"><alternatives><mml:math>
<mml:mi mathvariant="italic">V</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[$V\in {\Omega _{S}^{-}}$]]></tex-math></alternatives></inline-formula>.</p>
<p>To ensure that the data set is more resistant to any errors occurring during the measurements which produce those data, this condition should be made stricter by replacing 1 in the right side of the inequation with a certain integer <italic>d</italic>. This means that the positive and negative observations should differ by at least <italic>d</italic> attributes.</p>
<p>Therefore, the problem of minimizing a support set can be formulated as a conditional pseudo-Boolean optimization problem: 
<disp-formula id="j_info1192_eq_006">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">→</mml:mo>
<mml:mo movablelimits="false">min</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">w</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">U</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">V</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩾</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mspace width="1em"/>
<mml:mtext>for any</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">U</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mspace width="2.5pt"/>
<mml:mtext>and</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">V</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{array}{l}\displaystyle {\sum \limits_{j=1}^{t}}{y_{j}}\to \min ,\\ {} \displaystyle {\sum \limits_{i=1}^{t}}{w_{i}}(U,V){y_{i}}\geqslant d\hspace{1em}\text{for any}\hspace{2.5pt}U\in {\Omega _{s}^{+}}\hspace{2.5pt}\text{and}\hspace{2.5pt}V\in {\Omega _{s}^{-}},\end{array}\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_info1192_ineq_050"><alternatives><mml:math>
<mml:mi mathvariant="italic">y</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo fence="true" stretchy="false">{</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[$y\in {\{0,1\}^{t}}$]]></tex-math></alternatives></inline-formula>.</p>
<p>The objective function of this problem is unimodal, monotonic, pseudo-Boolean function (Antamoshkin and Masich, <xref ref-type="bibr" rid="j_info1192_ref_003">2007a</xref>, <xref ref-type="bibr" rid="j_info1192_ref_004">2007b</xref>; Antamoshkin and Semenkin, <xref ref-type="bibr" rid="j_info1192_ref_005">1998</xref>), i.e. it has a single absolute minimum located in the point <inline-formula id="j_info1192_ineq_051"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${y_{0}}=(0,0,\dots ,0)$]]></tex-math></alternatives></inline-formula> and its output increases as it gets further from the point of minimum (when any of its components changes from 0 to 1). The constraint function is also a unimodal, monotonic, pseudo-Boolean function, besides, it is defined using an algorithm, since its calculation requires iterating through all possible pairs of positive and negative observations.</p>
<p>An alternative approach to selecting the attributes is the specially designed algorithmic procedure, which is based on evaluating the importance of the given attributes and helps to obtain a reduced set (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_019">2014</xref>).</p>
<p>The importance of any attribute is estimated against the frequency of its inclusion into the patterns involved in the classifier (Brauner <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_010">2004</xref>). Therefore, the more often the attribute is found in the resulting patterns, the more important it is. Those attributes that cannot be found or are rarely involved in building patterns are considered unimportant.</p>
<p>The algorithmic procedure for generating a reduced set of attributes consists of four stages:</p>
<p>The first stage of the procedure for generating a reduced set of attributes involves conducting a classification of the entire set of attributes in order to determine the importance of each attribute.</p>
<p>The second stage requires a researcher to set an importance threshold as a reference against which it is possible to assess the importance of an individual attribute.</p>
<p>The third stage is about sorting the attributes by their importance and identifying those attributes whose importance value turned out to be beneath the specified threshold.</p>
<p>The fourth stage consists in excluding the attributes singled out at the third stage from consideration. The remaining attributes will combine to the reduced set. In this way, by applying varied importance thresholds, the researcher can obtain different reduced sets of attributes, which can later be used to build patterns.</p>
</sec>
<sec id="j_info1192_s_006">
<label>2.4</label>
<title>Building Patterns</title>
<p>The concept of patterns lies at the core of the reviewed approach. A positive pattern is defined as a subcube of a set of Boolean variables <inline-formula id="j_info1192_ineq_052"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="italic">B</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${B_{2}^{t}}$]]></tex-math></alternatives></inline-formula> that intersects with the set <inline-formula id="j_info1192_ineq_053"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> and does not share elements with the set <inline-formula id="j_info1192_ineq_054"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula>. A negative pattern is formed in a similar fashion. A positive <italic>a</italic>-pattern for <inline-formula id="j_info1192_ineq_055"><alternatives><mml:math>
<mml:mi mathvariant="italic">a</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo fence="true" stretchy="false">{</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[$a\in {\{0,1\}^{t}}$]]></tex-math></alternatives></inline-formula> is a pattern that contains point <italic>a</italic>. For every point <inline-formula id="j_info1192_ineq_056"><alternatives><mml:math>
<mml:mi mathvariant="italic">a</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[$a\in {\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula>, let us find the maximal <italic>a</italic>-pattern, i.e. the one covering the greatest number of points <inline-formula id="j_info1192_ineq_057"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_019">2014</xref>).</p>
<p>The corresponding subcube is defined using <inline-formula id="j_info1192_ineq_058"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${y_{j}}$]]></tex-math></alternatives></inline-formula> variables: 
<disp-formula id="j_info1192_eq_007">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfenced separators="" open="{" close="">
<mml:mrow>
<mml:mtable columnspacing="4.0pt" equalrows="false" columnlines="none" equalcolumns="false" columnalign="left left">
<mml:mtr>
<mml:mtd class="array">
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd class="array">
<mml:mtext>if the</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mtext>-th attribute is located in the subcube</mml:mtext>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd class="array">
<mml:mtext>otherwise</mml:mtext>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {y_{j}}=\left\{\begin{array}{l@{\hskip4.0pt}l}1,\hspace{1em}& \text{if the}\hspace{2.5pt}i\text{-th attribute is located in the subcube},\\ {} 0,\hspace{1em}& \text{otherwise}.\end{array}\right.\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>That is, by fixing <italic>l</italic> variables of the original cube with <italic>t</italic> dimensions, we obtain a subcube with <inline-formula id="j_info1192_ineq_059"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">t</mml:mi>
<mml:mo>−</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(t-l)$]]></tex-math></alternatives></inline-formula> dimensions and <inline-formula id="j_info1192_ineq_060"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
<mml:mo>−</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${2^{t-l}}$]]></tex-math></alternatives></inline-formula> points.</p>
<p>The condition stipulating that a positive pattern should not contain any points from <inline-formula id="j_info1192_ineq_061"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[${\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> demands that for each observation <inline-formula id="j_info1192_ineq_062"><alternatives><mml:math>
<mml:mi mathvariant="italic">b</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[$b\in {\Omega _{s}^{-}}$]]></tex-math></alternatives></inline-formula> the <inline-formula id="j_info1192_ineq_063"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${y_{j}}$]]></tex-math></alternatives></inline-formula> variable is equal to 1 at least for one <italic>j</italic>, where <inline-formula id="j_info1192_ineq_064"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${b_{j}}\ne {a_{j}}$]]></tex-math></alternatives></inline-formula>: 
<disp-formula id="j_info1192_eq_008">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mfrac linethickness="0">
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩾</mml:mo>
<mml:mn>1</mml:mn>
<mml:mspace width="1em"/>
<mml:mtext>for any</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">b</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {\sum \limits_{\genfrac{}{}{0pt}{}{j=1}{{b_{j}}\ne {a_{j}}}}^{t}}{y_{j}}\geqslant 1\hspace{1em}\text{for any}\hspace{2.5pt}b\in {\Omega _{s}^{-}}.\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>The limitation can be made stricter to help increase error resistance, in which case the number 1 in the right side of the inequation should be substituted for a positive integer <italic>d</italic>.</p>
<p>On the other hand, a positive observation <inline-formula id="j_info1192_ineq_065"><alternatives><mml:math>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[$c\in {\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula> will only belong to the considered subcube where the <inline-formula id="j_info1192_ineq_066"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${y_{j}}$]]></tex-math></alternatives></inline-formula> variable is equal to 0 for all indices <italic>j</italic>, where <inline-formula id="j_info1192_ineq_067"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${c_{j}}\ne {a_{j}}$]]></tex-math></alternatives></inline-formula>. In this manner, the number of positive observations covered by the <italic>a</italic>-pattern can be calculated using the following formula: 
<disp-formula id="j_info1192_eq_009">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∏</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mfrac linethickness="0">
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ \sum \limits_{c\in {\Omega _{s}^{+}}}{\prod \limits_{\genfrac{}{}{0pt}{}{j=1}{{c_{j}}\ne {a_{j}}}}^{t}}(1-{y_{j}}).\]]]></tex-math></alternatives>
</disp-formula>
</p>
<p>Therefore, the task of building patterns is reduced to a conditional pseudo-Boolean optimization problem with algorithmically defined functions (Bonates <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_008">2006</xref>; Hammer <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_013">2004a</xref>, <xref ref-type="bibr" rid="j_info1192_ref_014">2004b</xref>; Hwang and Choi, <xref ref-type="bibr" rid="j_info1192_ref_016">2015</xref>): <disp-formula-group id="j_info1192_dg_001">
<disp-formula id="j_info1192_eq_010">
<label>(2)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∏</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mfrac linethickness="0">
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo stretchy="false">→</mml:mo>
<mml:mo movablelimits="false">max</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ \sum \limits_{c\in {\Omega _{S}^{+}}}{\prod \limits_{\genfrac{}{}{0pt}{}{j=1}{{c_{j}}\ne {a_{j}}}}^{t}}(1-{y_{j}})\to \max ,\]]]></tex-math></alternatives>
</disp-formula>
<disp-formula id="j_info1192_eq_011">
<label>(3)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mfrac linethickness="0">
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩾</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mspace width="1em"/>
<mml:mtext>for any</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">b</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="2.5pt"/>
<mml:mi mathvariant="italic">y</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo fence="true" stretchy="false">{</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo fence="true" stretchy="false">}</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msup>
<mml:mo>.</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {\sum \limits_{\genfrac{}{}{0pt}{}{j=1}{{c_{j}}\ne {a_{j}}}}^{t}}{y_{j}}\geqslant d\hspace{1em}\text{for any}\hspace{2.5pt}b\in {\Omega _{s}^{-}},\hspace{2.5pt}y\in {\{0,1\}^{t}}.\]]]></tex-math></alternatives>
</disp-formula>
</disp-formula-group></p>
<p>The objective function (<xref rid="j_info1192_eq_010">2</xref>) and the constraint function (<xref rid="j_info1192_eq_011">3</xref>) in this problem are both unimodal, monotonic pseudo-Boolean functions.</p>
<p>The task of finding the maximal negative patterns is solved in a similar fashion.</p>
<p>Each identified pattern is characterized by its coverage – the number of captured observations within the corresponding class, and its degree – the number of fixed variables that determine this pattern. According to the above optimization model (2)–(3), the resulting patterns do not cover any observations from the different class (from the training set).</p>
<p>The most valuable are the patterns that demonstrate the greatest coverage. The greater the coverage, the more adequately the pattern reflects the image of the class.</p>
<p>The particular nature of the classification problem described above is in the fact that the database has a large number of unmeasured values (omitted data), whereas the measurements that have been made may be inaccurate or erroneous. It is well known that errors directly depend on measurement accuracy indicating how close the measurement results are to the actual values of the measured entities. The measurement accuracy can be increased or decreased, depending on the allocated resources (cost of measurement tools, spending on the process of measurement, stabilizing the external environment, etc.). It is understood that it must be fit for the task at hand, but not necessarily be of superior quality, because a further increase in accuracy may lead to excessive financial expenditures (Boros <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_009">2009</xref>).</p>
<p>Sets of quantitative data can have errors in the values of quantitative attributes because of imprecise tools, imperfect measurement methods or human errors. Noise and spikes can lead to observations from different classes “overlapping” with each other and getting in the “areas” of the opposite class. Consequently, the resulting patterns have a higher degree and a much lesser coverage than they would have had without those spikes and errors, while the classifier ends up consisting of a great number of small patterns (with little coverage). This prevents one from building an effective classifier with “well-interpreted” rules involving a small number of attributes and a high degree of classification accuracy.</p>
<p>To make the method more error-resistant, it is recommended to loosen the limitation described in (<xref rid="j_info1192_eq_011">3</xref>). This will reduce the number of calculated patterns and increase their coverage.</p>
<p>The limitation of the optimization model will then look in the following way (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_019">2014</xref>): 
<disp-formula id="j_info1192_eq_012">
<label>(4)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩽</mml:mo>
<mml:mi mathvariant="italic">D</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mtext>where</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mfenced separators="" open="{" close="">
<mml:mrow>
<mml:mtable columnspacing="4.0pt" equalrows="false" columnlines="none" equalcolumns="false" columnalign="left left">
<mml:mtr>
<mml:mtd class="array">
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd class="array">
<mml:mtext>if</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:msubsup>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mfrac linethickness="0">
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>⩾</mml:mo>
<mml:mi mathvariant="italic">d</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="array">
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
</mml:mtd>
<mml:mtd class="array">
<mml:mtext>otherwise</mml:mtext>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable>
</mml:mrow>
</mml:mfenced>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ \sum \limits_{b\in {\Omega _{S}^{-}}}{z_{b}}\leqslant D,\hspace{1em}\text{where}\hspace{2.5pt}{z_{b}}=\left\{\begin{array}{l@{\hskip4.0pt}l}0,\hspace{1em}& \text{if}\hspace{2.5pt}{\textstyle\textstyle\sum _{\genfrac{}{}{0pt}{}{j=1}{{b_{j}}\ne {a_{j}}}}^{t}}{y_{j}}\geqslant d,\\ {} 1,\hspace{1em}& \text{otherwise},\end{array}\right.\]]]></tex-math></alternatives>
</disp-formula> 
where <italic>D</italic> is the number of observations of a different class that are allowed to be covered by the pattern (a non-negative integer).</p>
<p>The functions (<xref rid="j_info1192_eq_010">2</xref>)–(<xref rid="j_info1192_eq_012">4</xref>) of the created optimization model are defined using an algorithm, i.e. they are calculated over a specific sequence of operations. The optimization problem is solved using optimization algorithms based on looking for boundary points of the permissible region (Antamoshkin and Masich, <xref ref-type="bibr" rid="j_info1192_ref_002">2006</xref>, <xref ref-type="bibr" rid="j_info1192_ref_003">2007a</xref>, <xref ref-type="bibr" rid="j_info1192_ref_004">2007b</xref>). Such algorithms were specially designed for this class of problems and are based on the behaviour of monotonic functions of the optimization model in the space of Boolean variables. The algorithms looking for boundary points are search algorithms, i.e. they do not require defining the functions explicitly, via algebraic expressions. Instead, they calculate the function outcome across a number of points.</p>
<p>According to the model <inline-formula id="j_info1192_ineq_068"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(2,4)$]]></tex-math></alternatives></inline-formula>, the most preferable patterns are the ones with the maximum coverage. Consequently, the patterns built in this way have a low degree, i.e. they consist of a small number of terms and use only a fraction of attributes. Low-degree patterns correspond to large areas in the space of attributes. This may lead to their covering some observations from a different class (missing in the training set) and the increased number of incorrectly classified observations. This characteristic feature affects the information content of the pattern towards reducing it. Therefore, to increase the information content, the authors suggest using an algorithmic procedure for aggregating patterns. It is applied to each created pattern by driving the degree of the said patterns to a maximum level while at the same time keeping their coverage intact: 
<disp-formula id="j_info1192_eq_013">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="left">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">→</mml:mo>
<mml:mo movablelimits="false">max</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">Y</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">Y</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[\begin{array}{l}\displaystyle {\sum \limits_{j=1}^{t}}{y_{j}}\to \max ,\\ {} \displaystyle fc(Y)=f{c^{\prime }}(Y),\end{array}\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_info1192_ineq_069"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">Y</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$fc(Y)$]]></tex-math></alternatives></inline-formula> is the value of the objective function (coverage) for the pattern before the aggregation procedure, <inline-formula id="j_info1192_ineq_070"><alternatives><mml:math>
<mml:mi mathvariant="italic">f</mml:mi>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">Y</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$f{c^{\prime }}(Y)$]]></tex-math></alternatives></inline-formula> is the value of the objective function for the pattern after the aggregation procedure.</p>
<p>This way, the application of the pattern aggregation procedure can increase the information content of the patterns by reducing their coverage by the observation rules from the other class, thus driving up the accuracy of the decisions made by the classifier.</p>
<p>The next stage of this method is dedicated to solving the problem of building an adequate classifier that could classify any incoming observation, i.e. the observation that was not around when the classifier was being built.</p>
</sec>
<sec id="j_info1192_s_007">
<label>2.5</label>
<title>Building a Classifier</title>
<p>The result of the previous stage of this method is a family of maximal patterns whose number is limited by the cardinal of the data set <inline-formula id="j_info1192_ineq_071"><alternatives><mml:math>
<mml:mo stretchy="false">|</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>∪</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">|</mml:mo></mml:math><tex-math><![CDATA[$|{\Omega ^{+}}\cup {\Omega ^{-}}|$]]></tex-math></alternatives></inline-formula>. The classifier consists of a full set of positive and negative patterns.</p>
<p>In order to classify a new observation, let us be guided by the following decision rule (Hammer and Bonates, <xref ref-type="bibr" rid="j_info1192_ref_012">2005</xref>): 
<list>
<list-item id="j_info1192_li_003">
<label>1)</label>
<p>If the observation satisfies the conditions of one or more positive patterns and does not satisfy any of the conditions of any negative ones, it is classified as positive.</p>
</list-item>
<list-item id="j_info1192_li_004">
<label>2)</label>
<p>If the observation satisfies the conditions of one or more negative patterns and does not satisfy any of the conditions of any positive ones, it is classified as negative.</p>
</list-item>
<list-item id="j_info1192_li_005">
<label>3)</label>
<p>Choosing the voting algorithm:</p>
<list>
<list-item id="j_info1192_li_006">
<label>a)</label>
<p>Simple voting algorithm. If an observation satisfies the conditions <inline-formula id="j_info1192_ineq_072"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${p^{\prime }}$]]></tex-math></alternatives></inline-formula> of <italic>p</italic> positive patterns and the conditions <inline-formula id="j_info1192_ineq_073"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${q^{\prime }}$]]></tex-math></alternatives></inline-formula> of <italic>q</italic> negative patterns, the sign of the observation is determined as <inline-formula id="j_info1192_ineq_074"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:mo>−</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mi mathvariant="italic">q</mml:mi></mml:math><tex-math><![CDATA[${p^{\prime }}/p-{q^{\prime }}/q$]]></tex-math></alternatives></inline-formula>.</p>
</list-item>
<list-item id="j_info1192_li_007">
<label>b)</label>
<p>Weighted voting algorithm. If an observation satisfies the conditions <inline-formula id="j_info1192_ineq_075"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${p^{\prime }}$]]></tex-math></alternatives></inline-formula> of <italic>p</italic> positive patterns and the conditions <inline-formula id="j_info1192_ineq_076"><alternatives><mml:math>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${q^{\prime }}$]]></tex-math></alternatives></inline-formula> of <italic>q</italic> negative patterns, the sign of the observation is determined as <inline-formula id="j_info1192_ineq_077"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>−</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="italic">q</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>′</mml:mo>
</mml:mrow>
</mml:msup>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">b</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${\textstyle\sum _{n=1}^{{p^{\prime }}}}{a_{n}}-{\textstyle\sum _{n=1}^{{q^{\prime }}}}{b_{n}}$]]></tex-math></alternatives></inline-formula>, where <italic>a</italic> and <italic>b</italic> are weighting factors for the positive and negative patterns respectively. The weight of the <italic>n</italic>-th positive pattern is calculated according to the formula: <inline-formula id="j_info1192_ineq_078"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="false">
<mml:mfrac>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
<mml:mrow>
<mml:msubsup>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mstyle></mml:math><tex-math><![CDATA[${a_{n}}=\frac{{H_{n}}}{{\textstyle\sum _{n=1}^{p}}{H_{n}}}$]]></tex-math></alternatives></inline-formula>, where <italic>Hn</italic> is the information content of the <italic>n</italic>-th positive pattern calculated using the boosting criterion (6) (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_018">2012</xref>). The cumulative weight of all positive patterns is equal to 1: <inline-formula id="j_info1192_ineq_079"><alternatives><mml:math>
<mml:msubsup>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
</mml:msubsup>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn></mml:math><tex-math><![CDATA[${\textstyle\sum _{n=1}^{p}}{a_{n}}=1$]]></tex-math></alternatives></inline-formula>. Similarly, it is possible to calculate the information content and the weight of the <italic>n</italic>-th negative pattern.</p>
</list-item>
<list-item id="j_info1192_li_008">
<label>4)</label>
<p>In case the observation does not meet any conditions of any pattern, either positive or negative, it is assigned to the class that has the lowest price of error.</p>
</list-item>
</list>
</list-item>
</list>
</p>
</sec>
<sec id="j_info1192_s_008">
<label>2.6</label>
<title>Modifications to the Method of Logical Analysis of Data</title>
<p>Creating patterns and building a classifier are milestone stages of the method of logical analysis of data. The implementation of these stages is what directly determines the quality of the classification results. For that reason, the design of modifications to the method is associated with developing algorithmic procedures that address these stages.</p>
<p>So, at the pattern-creating stage, the suggested approach to defining the objective function for the optimization model is based on modifying the objective function (2) in order to emphasize the differences between the rules used in the classifier. This approach rests on the premise that the patterns to be voted should be different; otherwise they will serve no purpose for the classification.</p>
<p>According to the objective function (2), each created pattern maximizes its coverage by capturing observations typical for the corresponding class, whereas non-typical observations of the class remain uncovered, and the classifier does not comprise any patterns that take those into account. This way we obtain a set of similar patterns for the class, thus compromising the classification quality. To get a classifier with a higher distinction between the rules that allows allocating significantly different subsets of observations, the authors suggest introducing the following modification to the objective function (2) in order to identify positive patterns: 
<disp-formula id="j_info1192_eq_014">
<label>(5)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">S</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup>
</mml:mrow>
</mml:munder>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
</mml:msub>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∏</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mfrac linethickness="0">
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">≠</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">t</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo stretchy="false">→</mml:mo>
<mml:mo movablelimits="false">max</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ \sum \limits_{c\in {\Omega _{S}^{+}}}{K_{c}}{\prod \limits_{\genfrac{}{}{0pt}{}{j=1}{{c_{j}}\ne {a_{j}}}}^{t}}(1-{y_{j}})\to \max ,\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_info1192_ineq_080"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">c</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${K_{c}}$]]></tex-math></alternatives></inline-formula> is the weight of the positive observation <inline-formula id="j_info1192_ineq_081"><alternatives><mml:math>
<mml:mi mathvariant="italic">c</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msubsup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">s</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msubsup></mml:math><tex-math><![CDATA[$c\in {\Omega _{s}^{+}}$]]></tex-math></alternatives></inline-formula>, which decreases when this observation is covered, effectively lowering its participation priority in building the next pattern in favour of uncovered observations.</p>
<p>The objective function for the optimization model used to identify negative patterns is created in a similar fashion.</p>
<p>To be able to use the optimization model with the objective function (5) for building patterns, it is necessary to specify the initial weights for all observations and the rule for changing the weights of those observations that have participated in creating the current pattern. It is recommended to set the initial weights to 1 for each observation in a training set. Below is the rule for changing the weight of any observation that has already participated in creating the current pattern: 
<disp-formula id="j_info1192_eq_015">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:mo movablelimits="false">max</mml:mo>
<mml:mo maxsize="2.03em" minsize="2.03em" fence="true">[</mml:mo>
<mml:mn>0</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="0.1667em"/>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>−</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo movablelimits="false">max</mml:mo>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:mo maxsize="2.03em" minsize="2.03em" fence="true">]</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {K_{i+1}}=\max \bigg[0,\hspace{0.1667em}{K_{i}}-\frac{1}{{N_{\max }}}\bigg],\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_info1192_ineq_082"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${K_{i}}$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_083"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">K</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${K_{i+1}}$]]></tex-math></alternatives></inline-formula> are the weights of the observation that is being covered during the creation of the current and the next patterns, <inline-formula id="j_info1192_ineq_084"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo movablelimits="false">max</mml:mo>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${N_{\max }}$]]></tex-math></alternatives></inline-formula> is a researcher-specified parameter denoting the maximum number of patterns that can cover an observation from the training set in the classifier.</p>
<p>This way, using the optimization model with the objective function (5) to build patterns, one can come up with logical rules that cover significantly different subsets of observations. Later on, those of them that yield a positive outcome of the objective function are selected and aggregated in the classifier.</p>
<p>The next stage of the method is dedicated to solving the problem of building an adequate classifier that could correctly classify any incoming observation, i.e. the observation that did not take part in the creation of the classifier.</p>
<p>In view of a potentially large volume of the data set, a question arises as to the need of reducing the number of patterns, since this quantity in the original classifier is equal to the cardinal of the training data set <inline-formula id="j_info1192_ineq_085"><alternatives><mml:math>
<mml:mo stretchy="false">|</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>+</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo>∪</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mi mathvariant="normal">Ω</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mo>−</mml:mo>
</mml:mrow>
</mml:msup>
<mml:mo stretchy="false">|</mml:mo></mml:math><tex-math><![CDATA[$|{\Omega ^{+}}\cup {\Omega ^{-}}|$]]></tex-math></alternatives></inline-formula>. In short, it is necessary to define a classifier consisting of a certain number of patterns in such a way that it would be capable of classifying the same observations that are possible to classify using a complete system of patterns.</p>
<p>This study offers the following algorithmic procedures for reducing the number of patterns in the original classifier:</p>
<list>
<list-item id="j_info1192_li_009">
<label>–</label>
<p>selecting baseline observations for building patterns (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_019">2014</xref>);</p>
</list-item>
<list-item id="j_info1192_li_010">
<label>–</label>
<p>building a classifier as a composition of informative patterns (Kuzmich and Masich, <xref ref-type="bibr" rid="j_info1192_ref_018">2012</xref>).</p>
</list-item>
</list>
<p>The implementation of the algorithmic procedure of selecting baseline observations for building patterns involves completing a series of consecutive steps. First, based on the observations from the training set, one needs to derive centroids for each class by using the <italic>k</italic>-means algorithm. According to the <italic>k</italic>-means clustering algorithm, each observation from the training set has to be put into one of the <italic>k</italic>-clusters so that each cluster is represented by the centroid of the corresponding observations, whereby the distance from each observation to the centroid of its cluster is shorter that the distance to the centroids of any other cluster. This algorithm makes it possible to pick a range of centroids that most accurately represents the distribution of observations in the training set.</p>
<p>The algorithm comprises the following steps described in Bagirov (<xref ref-type="bibr" rid="j_info1192_ref_006">2011</xref>):</p>
<p><bold>Step 1.</bold> Pick <italic>k</italic> initial centroids <inline-formula id="j_info1192_ineq_086"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">k</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${z_{1}}(1),{z_{2}}(2),\dots ,{z_{k}}(l)$]]></tex-math></alternatives></inline-formula>. The initial centroids are selected arbitrarily, e.g. the first <italic>k</italic> observations from the training set.</p>
<p><bold>Step</bold> <inline-formula id="j_info1192_ineq_087"><alternatives><mml:math>
<mml:mi mathvariant="bold-italic">l</mml:mi></mml:math><tex-math><![CDATA[$\boldsymbol{l}$]]></tex-math></alternatives></inline-formula><bold>.</bold> At the <italic>l</italic>-th step of the iteration, distribute the set of observations <inline-formula id="j_info1192_ineq_088"><alternatives><mml:math>
<mml:mi mathvariant="italic">X</mml:mi>
<mml:mo>=</mml:mo>
<mml:mo fence="true" stretchy="false">{</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">m</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo fence="true" stretchy="false">}</mml:mo></mml:math><tex-math><![CDATA[$X=\{{x_{1}},{x_{2}},\dots ,{x_{m}}\}$]]></tex-math></alternatives></inline-formula> among <italic>k</italic> clusters according to the following rule: 
<disp-formula id="j_info1192_eq_016">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mtext>if</mml:mtext>
<mml:mspace width="2.5pt"/>
<mml:mo maxsize="1.19em" minsize="1.19em" stretchy="true">‖</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" stretchy="true">‖</mml:mo>
<mml:mo mathvariant="normal">&lt;</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" stretchy="true">‖</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo maxsize="1.19em" minsize="1.19em" stretchy="true">‖</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ x\in {T_{j}}(l),\hspace{1em}\text{if}\hspace{2.5pt}\big\| x-{z_{j}}(l)\big\| <\big\| x-{z_{i}}(l)\big\| \]]]></tex-math></alternatives>
</disp-formula> 
for every <inline-formula id="j_info1192_ineq_089"><alternatives><mml:math>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi></mml:math><tex-math><![CDATA[$i=1,2,\dots ,k$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_090"><alternatives><mml:math>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo stretchy="false">≠</mml:mo>
<mml:mi mathvariant="italic">j</mml:mi></mml:math><tex-math><![CDATA[$i\ne j$]]></tex-math></alternatives></inline-formula>, where <inline-formula id="j_info1192_ineq_091"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${T_{j}}(l)$]]></tex-math></alternatives></inline-formula> is the set of observations belonging to the cluster with the centroid <inline-formula id="j_info1192_ineq_092"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${z_{j}}(l)$]]></tex-math></alternatives></inline-formula>. In case of equality, the decision is made in arbitrary way.</p>
<p><bold>Step</bold> <inline-formula id="j_info1192_ineq_093"><alternatives><mml:math>
<mml:mi mathvariant="bold-italic">l</mml:mi>
<mml:mo mathvariant="bold">+</mml:mo>
<mml:mn mathvariant="bold">1</mml:mn></mml:math><tex-math><![CDATA[$\boldsymbol{l}\boldsymbol{+}\mathbf{1}$]]></tex-math></alternatives></inline-formula><bold>.</bold> Based on the results of step <italic>l</italic>, new centroids of clusters <inline-formula id="j_info1192_ineq_094"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${z_{j}}(l+1)$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_095"><alternatives><mml:math>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi></mml:math><tex-math><![CDATA[$j=1,2,\dots ,k$]]></tex-math></alternatives></inline-formula> are derived, on the assumption that the sum of squared distances between all observations belonging to the set <inline-formula id="j_info1192_ineq_096"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${T_{j}}(l)$]]></tex-math></alternatives></inline-formula> and the new centroid of this cluster must be minimal.</p>
<p>The centroid <inline-formula id="j_info1192_ineq_097"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">y</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${y_{j}}(l+1)$]]></tex-math></alternatives></inline-formula> ensuring the minimization <inline-formula id="j_info1192_ineq_098"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">J</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mo largeop="false" movablelimits="false">∑</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:msub>
<mml:mo stretchy="false">‖</mml:mo>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo>−</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:msup>
<mml:mrow>
<mml:mo stretchy="false">‖</mml:mo>
</mml:mrow>
<mml:mrow>
<mml:mn>2</mml:mn>
</mml:mrow>
</mml:msup></mml:math><tex-math><![CDATA[${J_{j}}={\textstyle\sum _{x\in {T_{j}}(l)}}\| x-{z_{j}}(l+1){\| ^{2}}$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_099"><alternatives><mml:math>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi></mml:math><tex-math><![CDATA[$j=1,2,\dots ,k$]]></tex-math></alternatives></inline-formula> is a sample average calculated across the set <inline-formula id="j_info1192_ineq_100"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${T_{j}}(l)$]]></tex-math></alternatives></inline-formula>. Therefore, the new cluster centroids are defined as: 
<disp-formula id="j_info1192_eq_017">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:munder>
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo stretchy="false">∈</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
</mml:mrow>
</mml:munder>
<mml:mi mathvariant="italic">x</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mspace width="1em"/>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {z_{j}}(l+1)=\frac{1}{{N_{j}}}\sum \limits_{x\in {T_{j}}(l)}x,\hspace{1em}j=1,2,\dots ,k,\]]]></tex-math></alternatives>
</disp-formula> 
where <inline-formula id="j_info1192_ineq_101"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">N</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${N_{j}}$]]></tex-math></alternatives></inline-formula> is the number of sample observations included into the set <inline-formula id="j_info1192_ineq_102"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${T_{j}}(l)$]]></tex-math></alternatives></inline-formula>. Apparently, the choice of the <italic>k</italic>-means algorithm is due to the established way of sequential correction of the calculated cluster centroids.</p>
<p>The equation <inline-formula id="j_info1192_ineq_103"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo>+</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">z</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${z_{j}}(l+1)={z_{j}}(l)$]]></tex-math></alternatives></inline-formula>, given <inline-formula id="j_info1192_ineq_104"><alternatives><mml:math>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi></mml:math><tex-math><![CDATA[$j=1,2,\dots ,k$]]></tex-math></alternatives></inline-formula>, is the condition for the convergence of this algorithm, and upon its achievement the execution of algorithm stops. The resulting sets <inline-formula id="j_info1192_ineq_105"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">T</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">j</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">l</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[${T_{j}}(l)$]]></tex-math></alternatives></inline-formula>, <inline-formula id="j_info1192_ineq_106"><alternatives><mml:math>
<mml:mi mathvariant="italic">j</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mo>…</mml:mo>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">k</mml:mi></mml:math><tex-math><![CDATA[$j=1,2,\dots ,k$]]></tex-math></alternatives></inline-formula> will be the sought-for clusters. If this is not the case, the last step is repeated.</p>
<p>This algorithm is used to partition the observations of the training set of each class into clusters. It produces a separate set of centroids for each class.</p>
<p>Second, one needs to add the resulting sets of centroids to the observations in the training set. Third, the centroids are used as baseline observations for building patterns.</p>
<p>This way, by implementing the heuristic procedure described above, we get a new classifier consisting of a lesser number of patterns. The number of patterns in the classifier will be equal to the cumulative number of centroids obtained for each class. Clearly, the classification accuracy depends on the number of centroids for each class, therefore one needs to conduct multiple experiments with sets of centroids of diverse quantity in order to establish how the classification accuracy depends on the number of centroids for each class.</p>
<p>The procedure of selecting baseline observations for building patterns must be implemented prior to creating the classifier, effectively simplifying its creation due to the significant reduction of the number of patterns to be built, however, this will normally slightly degrade the classification accuracy. To mitigate this shortcoming, another approach can be used to reduce the number of patterns in the original classifier. It is necessary to build a classifier whose number of patterns is equal to the cardinal of the training data set, and to reduce this number of patterns while retaining the high accuracy of classification. This approach can be implemented through the suggested procedure of building a classifier as a composition of informative patterns, which is based on the concept of their information content.</p>
<p>There are several criteria for measuring the information content of a pattern offered in the discipline-specific literature. This study recommends using the boosting criterion, since it adequately assesses the information content of a pattern and is fairly simple to calculate: 
<disp-formula id="j_info1192_eq_018">
<label>(6)</label><alternatives><mml:math display="block">
<mml:mtable displaystyle="true" columnalign="right">
<mml:mtr>
<mml:mtd class="align-odd">
<mml:mi mathvariant="italic">H</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mi mathvariant="italic">p</mml:mi>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mi mathvariant="italic">n</mml:mi>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo>
<mml:mo>=</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mi mathvariant="italic">p</mml:mi>
</mml:mrow>
</mml:msqrt>
<mml:mo>−</mml:mo>
<mml:msqrt>
<mml:mrow>
<mml:mi mathvariant="italic">n</mml:mi>
</mml:mrow>
</mml:msqrt>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ H(p,n)=\sqrt{p}-\sqrt{n},\]]]></tex-math></alternatives>
</disp-formula> 
where <italic>p</italic> is the number of observations of own class captured by the created pattern; <italic>n</italic> is the number of observations from other classes captured by the created pattern.</p>
<p>Initially, the classifier includes all patterns that are built against each observation in the training set. Consequently, as the volume of the training set increases, so does the size of the set of rules for the classifier. Notably, the created patterns are characterized by different information content. The patterns covering a small number of observations are statistically unreliable – they include too many patterns that make more mistakes with independent support data than with a training set. For that reason, it is recommended to only include informative patterns into the classifier, i.e. their information content must exceed a certain information threshold (<inline-formula id="j_info1192_ineq_107"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${H_{0}}$]]></tex-math></alternatives></inline-formula>) specified by the researcher. This will help to reduce the number of patterns in the classifier without compromising the classification accuracy or with only slight changes towards its improvement/deterioration.</p>
<p>The solving of this problem raises the issue of choosing the information threshold. This study addresses this issue through designing the following iterative procedure. The first step of this procedure suggests setting the information threshold to 0 for both positive and negative sets of patterns, thus resulting in the original classifier consisting of the maximum number of patterns possible. At the second step of this procedure, it is necessary to set the information threshold for negative (positive) patterns, which should be equal to the average information content (<italic>H</italic><inline-formula id="j_info1192_ineq_108"><alternatives><mml:math>
<mml:msub>
<mml:mrow/>
<mml:mrow>
<mml:mi mathvariant="italic">a</mml:mi>
<mml:mi mathvariant="italic">v</mml:mi>
<mml:mi mathvariant="italic">g</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${_{avg}}$]]></tex-math></alternatives></inline-formula>) across all negative (positive) patterns: 
<disp-formula id="j_info1192_eq_019">
<alternatives><mml:math display="block">
<mml:mtable displaystyle="true">
<mml:mtr>
<mml:mtd>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">avg</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo>=</mml:mo><mml:mstyle displaystyle="true">
<mml:mfrac>
<mml:mrow>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">q</mml:mi>
</mml:mrow>
</mml:mfrac>
</mml:mstyle>
<mml:munderover accentunder="false" accent="false">
<mml:mrow>
<mml:mstyle displaystyle="true">
<mml:mo largeop="true" movablelimits="false">∑</mml:mo></mml:mstyle>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
<mml:mo>=</mml:mo>
<mml:mn>1</mml:mn>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">q</mml:mi>
</mml:mrow>
</mml:munderover>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub>
<mml:mo mathvariant="normal">,</mml:mo>
</mml:mtd>
</mml:mtr>
</mml:mtable></mml:math><tex-math><![CDATA[\[ {H_{\mathit{avg}}}=\frac{1}{q}{\sum \limits_{i=1}^{q}}{H_{i}},\]]]></tex-math></alternatives>
</disp-formula> 
where <italic>q</italic> is the number of negative (positive) patterns in the classifier, <inline-formula id="j_info1192_ineq_109"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">i</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${H_{i}}$]]></tex-math></alternatives></inline-formula> is the information content of the <italic>i</italic>-th negative (positive) pattern calculated using the formula (<xref rid="j_info1192_eq_018">6</xref>).</p>
<p>To get a new classifier consisting of patterns with greater information content, we will remove from the original classifier all negative (positive) patterns whose information content is below the information threshold derived for them. Having calculated the values of the average information content for negative and positive patterns of the current classifier, we will use them to build the next classifier that will consist of patterns whose information content is higher than the values of the average information content for the current classifier. This way we will build each successive classifier, each time utilizing the average information content of the present one. This shortens the number of patterns and increases the average information content for each successive classifier. The procedure should stop as soon as the number of unclassified (uncovered) observations has increased during the classification process, i.e. the patterns included in the current classifier fail to cover certain observations belonging to the test sample. In this case, it is necessary to either get back to the previous classifier and reverse the two information threshold to their previous values, or change the value of only one information threshold for negative (positive) patterns and register how this amendment will affect the number of unclassified observations and the classification results in general.</p>
<p>Based on the designed algorithmic procedures, the authors suggest the following modifications to the method of logical analysis of data in order to improve the generalization capability of the classifier and make it more interpretable by reducing the number of rules it uses:</p>
<list>
<list-item id="j_info1192_li_011">
<label>–</label>
<p>using the objective function (5) and the constraint function (4) to create patterns and build the classifier exclusively on the rules that yield a positive (greater than zero) outcome of the objective function;</p>
</list-item>
<list-item id="j_info1192_li_012">
<label>–</label>
<p>using the algorithmic procedure for selecting baseline observations to create patterns and applying the aggregation procedure to the resulting rules;</p>
</list-item>
<list-item id="j_info1192_li_013">
<label>–</label>
<p>applying the algorithmic procedure of building a classifier as a composition of informative patterns based on the optimization model <inline-formula id="j_info1192_ineq_110"><alternatives><mml:math>
<mml:mo mathvariant="normal" fence="true" stretchy="false">(</mml:mo>
<mml:mn>2</mml:mn>
<mml:mo mathvariant="normal">,</mml:mo>
<mml:mn>4</mml:mn>
<mml:mo mathvariant="normal" fence="true" stretchy="false">)</mml:mo></mml:math><tex-math><![CDATA[$(2,4)$]]></tex-math></alternatives></inline-formula> coupled with the aggregation procedure.</p>
</list-item>
</list>
<p>The suggested modifications to the method of logical analysis of data can help improve the quality of the classification of new observations.</p>
</sec>
</sec>
<sec id="j_info1192_s_009">
<label>3</label>
<title>Obtained Results</title>
<p>The method of logical analysis of data is implemented in a software system that made it possible to solve the following classification problems taken from the UCI Machine Learning Repository: SPAM detection, classification of the results of radar scans of the ionosphere. The problem of complications prediction of the myocardial infarction (MI) is also considered. For the solving of this problem, the staff of the Chair of internal diseases No. 1 of the Krasnoyarsk State Medical Academy collected the information on the course of a disease of 1700 patients with the MI undergoing the treatment in 1989–1995 at the Cardiological center of a Municipal Hospital No. 20 of Krasnoyarsk. Information is obtained from case histories of patients. Each observation (patient) was characterized by a vector of 112 characteristics (Golovenkin <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_011">1997</xref>). The characteristics are binary (majority) rated and numerical values. There is a considerable number of missed data in this data sample. Among the chosen complications, there exist fibrillation of auricles (FA), fibrillation of ventricles (FV), fluid lungs (FL), cardiorrhesis (CR), and also lethal outcome (LO).</p>
<p>Earlier, the problem of prediction of the MI complications was solved by means of neural networks (Golovenkin <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_011">1997</xref>). At its solution it was noted that the qualifier yields poor results in a case of essential distinction in number of observations of each class in the initial data sample. Therefore, the following approach to the solution of this problem was offered. The number of patients with some complication (positive observations) is approximately ten times smaller than the number of patients at whom this complication was not observed (the negative observations). The initial data sample (1700 observations) is divided into test data sample and 10 training data samples for every complication. The positive observations in the training data samples remain the same and the negative observations differ. The method is trained on each of training data samples separately but it is tested on the common examining data sample. Finally, the solution on each observation of the examining data sample is made by a majority of votes of all qualifiers received on the basis of 10 training data samples. When using this approach for the solution of our problem, besides classification upgrading, we have an opportunity of classification results comparison of methods of the logical data analysis and neural networks. The number of patients with complications and without complications of each of 10 selections and the size of test data sample for all considered complications are presented in Table <xref rid="j_info1192_tab_001">1</xref>.</p>
<table-wrap id="j_info1192_tab_001">
<label>Table 1</label>
<caption>
<p>Structure of data samples of all MI complications.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"/>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">FA</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">FV</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">FL</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">CR</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">LO</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Number of positive observations</td>
<td style="vertical-align: top; text-align: left">70</td>
<td style="vertical-align: top; text-align: left">170</td>
<td style="vertical-align: top; text-align: left">159</td>
<td style="vertical-align: top; text-align: left">54</td>
<td style="vertical-align: top; text-align: left">160</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Number of negative observations</td>
<td style="vertical-align: top; text-align: left">181</td>
<td style="vertical-align: top; text-align: left">180</td>
<td style="vertical-align: top; text-align: left">173</td>
<td style="vertical-align: top; text-align: left">179</td>
<td style="vertical-align: top; text-align: left">172</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Number of observations in the examining data sample</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">30</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">50</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">39</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">28</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">50</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>The rules for each problem were being derived using four optimization models: the “strict” model disallowing the created rules to cover observations from a different class; the modified model allowing the rules to cover a certain limited number of observations from a different class; the modified model with a pattern aggregation procedure; the model for creating patterns covering significantly different subsets of observations from the training set.</p>
<table-wrap id="j_info1192_tab_002">
<label>Table 2</label>
<caption>
<p>Classification results for the problem of SPAM detection.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Optimization problem</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Set of rules</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Num. of rules</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coverage of negative observations</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coverage of positive observations</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Degree of the rule</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Classi-fication accuracy, %</td>
</tr>
</thead>
<tbody>
<tr>
<td rowspan="2" style="vertical-align: middle; text-align: left">Objective function (2), constraint function (3)</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">234</td>
<td style="vertical-align: top; text-align: left">49</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">98</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">134</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">29</td>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">68</td>
</tr>
<tr>
<td rowspan="2" style="vertical-align: middle; text-align: left">Objective function (2), constraint function (4)</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">234</td>
<td style="vertical-align: top; text-align: left">96</td>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">98</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">134</td>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">50</td>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">79</td>
</tr>
<tr>
<td rowspan="2" style="vertical-align: middle; text-align: left">Objective function (2), constraint function (4) with the application of the augmentation procedure</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">234</td>
<td style="vertical-align: top; text-align: left">96</td>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">7</td>
<td style="vertical-align: top; text-align: left">98</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">134</td>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">50</td>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">87</td>
</tr>
<tr>
<td rowspan="2" style="vertical-align: middle; text-align: left; border-bottom: solid thin">Objective function (5), constraint function (4)</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">49</td>
<td style="vertical-align: top; text-align: left">69</td>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">96</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">pos.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">59</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">5</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">31</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">4</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">72</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Table <xref rid="j_info1192_tab_002">2</xref> shows the classification results for one of the aforementioned problems – the SPAM detection. The test was run against 279 negative (non-SPAM) and 181 positive observations (SPAM), with 20% of the set being used in the test. Overall, 20 experiments have been conducted, with their results averaged out.</p>
<p>By applying the pattern aggregation procedure, it is possible to obtain higher-degree patterns with the maximal coverage, which helps to increase the reliability of the decisions made by the classifier. The modification to the method of logical analysis of data involving the application of the objective function (5) allows simplifying the classifier by significantly reducing the number of its patterns.</p>
<p>Let us conduct the check of the procedure for selecting baseline observations for creating patterns. The solution to the problem of classifying the results of a radar scan of the ionosphere requires generating 15 centroids for each class using the <italic>k</italic>-means clustering algorithm run within the WEKA software. The generated centroids are then added to the original training set, and patterns are built upon them. Ultimately, within the scope of this problem, the test is carried out on just 20% of the set consisting of 240 positive and 141 negative observations. The corresponding classification results are given in Table <xref rid="j_info1192_tab_003">3</xref>.</p>
<table-wrap id="j_info1192_tab_003">
<label>Table 3</label>
<caption>
<p>Accuracyof the solutions to the problem of classifying the results of the ionosphere radar scan.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Set of rules</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coverage of neg. observations in the new/original classifier</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coverage of pos. observations in the new/original classifier</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Degree of the rule in the new/original classifier</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Number of rules in the new/original classifier</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Accuracy of the new classifier, %</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Accuracy of the original classifier, %</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">Neg.</td>
<td style="vertical-align: top; text-align: left">45 / 36</td>
<td style="vertical-align: top; text-align: left">15 / 15</td>
<td style="vertical-align: top; text-align: left">2 / 2</td>
<td style="vertical-align: top; text-align: left">15 / 95</td>
<td style="vertical-align: top; text-align: left">74</td>
<td style="vertical-align: top; text-align: left">68</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">Pos.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">15 / 15</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">139 / 130</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">3 / 3</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">15 / 186</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">96</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">98</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>According to the results (see Table <xref rid="j_info1192_tab_003">3</xref>), we have achieved a slight change in the classification accuracy for the problem at hand and a 9-fold decrease in the number of rules used by the classifier.</p>
<p>Let us conduct the check of the algorithmic procedure for building a classifier as a composition of informative patterns as applied to the problem of SPAM detection. Only 20% of the set are used for this test. The classification results are given in Table <xref rid="j_info1192_tab_004">4</xref>. For each experiment presented in Table <xref rid="j_info1192_tab_004">4</xref>, the researcher only specifies the information threshold. In the first experiment, the information thresholds are set to 0 for each class. In all subsequent experiments, they are equal to the average information content calculated under the previous experiment. Upon the occurrence of uncovered observations, the value of the information content is amended for one class only.</p>
<p>According to the obtained results (see Table <xref rid="j_info1192_tab_004">4</xref>), it is possible to conclude that the method modification associated with this procedure allows simplifying the classifier, since the number of rules it is comprised of decreases 4-fold with respect to the full set of rules for this problem. This, however, does not compromise the accuracy of the classification or does so to a negligible extent.</p>
<table-wrap id="j_info1192_tab_004">
<label>Table 4</label>
<caption>
<p>Classification results for the problem of SPAM detection following the change in the value of the information threshold, <inline-formula id="j_info1192_ineq_111"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${H_{0}}$]]></tex-math></alternatives></inline-formula>.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin"><inline-formula id="j_info1192_ineq_112"><alternatives><mml:math>
<mml:mi mathvariant="italic">S</mml:mi>
<mml:mo mathvariant="normal" stretchy="false">/</mml:mo>
<mml:mi mathvariant="italic">n</mml:mi></mml:math><tex-math><![CDATA[$S/n$]]></tex-math></alternatives></inline-formula> of experiment</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Set of rules</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Number of rules</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Average meaning- fulness, <inline-formula id="j_info1192_ineq_113"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mi mathvariant="italic">avg</mml:mi>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${H_{\mathit{avg}}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Meaning-fulness threshold, <inline-formula id="j_info1192_ineq_114"><alternatives><mml:math>
<mml:msub>
<mml:mrow>
<mml:mi mathvariant="italic">H</mml:mi>
</mml:mrow>
<mml:mrow>
<mml:mn>0</mml:mn>
</mml:mrow>
</mml:msub></mml:math><tex-math><![CDATA[${H_{0}}$]]></tex-math></alternatives></inline-formula></td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coverage of negative observations</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Coverage of positive observations</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Number of uncovered observations</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Classification accuracy, %</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">1</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">234</td>
<td style="vertical-align: top; text-align: left">7.84</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">120</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">96</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">134</td>
<td style="vertical-align: top; text-align: left">4.49</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">57</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">89</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">2</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">132</td>
<td style="vertical-align: top; text-align: left">8.51</td>
<td style="vertical-align: top; text-align: left">7.84</td>
<td style="vertical-align: top; text-align: left">134</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">93</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">79</td>
<td style="vertical-align: top; text-align: left">5.49</td>
<td style="vertical-align: top; text-align: left">4.49</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">70</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">85</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">3</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">68</td>
<td style="vertical-align: top; text-align: left">8.85</td>
<td style="vertical-align: top; text-align: left">8.51</td>
<td style="vertical-align: top; text-align: left">141</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">1</td>
<td style="vertical-align: top; text-align: left">87</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">39</td>
<td style="vertical-align: top; text-align: left">6.05</td>
<td style="vertical-align: top; text-align: left">5.49</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">77</td>
<td style="vertical-align: top; text-align: left">1</td>
<td style="vertical-align: top; text-align: left">79</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">4</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">68</td>
<td style="vertical-align: top; text-align: left">8.85</td>
<td style="vertical-align: top; text-align: left">8.51</td>
<td style="vertical-align: top; text-align: left">141</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">98</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left"/>
<td style="vertical-align: top; text-align: left">pos.</td>
<td style="vertical-align: top; text-align: left">79</td>
<td style="vertical-align: top; text-align: left">5.49</td>
<td style="vertical-align: top; text-align: left">4.49</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">70</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">87</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">5</td>
<td style="vertical-align: top; text-align: left">neg.</td>
<td style="vertical-align: top; text-align: left">34</td>
<td style="vertical-align: top; text-align: left">9.03</td>
<td style="vertical-align: top; text-align: left">8.85</td>
<td style="vertical-align: top; text-align: left">146</td>
<td style="vertical-align: top; text-align: left">10</td>
<td style="vertical-align: top; text-align: left">0</td>
<td style="vertical-align: top; text-align: left">96</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin"/>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">pos.</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">79</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">5.49</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">4.49</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">10</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">70</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">0</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">89</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Table <xref rid="j_info1192_tab_005">5</xref> provides the comparison of the accuracy of classification results for 6 machine-learning algorithms (1-R, Barsegyan <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_007">2004</xref>, RIPPER, Vijayarani and Divya, <xref ref-type="bibr" rid="j_info1192_ref_025">2011</xref>, CART, Shi <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_022">2016</xref>, C4.5, Vijayarani and Divya, <xref ref-type="bibr" rid="j_info1192_ref_025">2011</xref>, Random Forest, Provost <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_020">2016</xref>, Adaboost, Sun <italic>et al.</italic>, <xref ref-type="bibr" rid="j_info1192_ref_024">2016</xref>) obtained in the WEKA (Weka 3, 2015) data analysis system, with the accuracy of the results obtained using the method of logical analysis of data (LAD) that the authors designed. The data sets for each problem are randomly divided into a training set (80%) and a test set (20%) for SPAM detection and classification of radar scan results of the ionosphere. Twenty experiments have been conducted for each method, with their results averaged out. For the problem of predicting of the MI complications, the sample size used for testing for each complication was determined according to Table <xref rid="j_info1192_tab_001">1</xref>.</p>
<table-wrap id="j_info1192_tab_005">
<label>Table 5</label>
<caption>
<p>Comparison of classification algorithms.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Problem</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Algorithm measure</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">1-R</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">RIP-PER</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">CART</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">C4.5</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Random forest</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Adaboost</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">LAD</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">SPAM detection</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">82.6</td>
<td style="vertical-align: top; text-align: left">91.3</td>
<td style="vertical-align: top; text-align: left">90.2</td>
<td style="vertical-align: top; text-align: left">90.2</td>
<td style="vertical-align: top; text-align: left">89.1</td>
<td style="vertical-align: top; text-align: left">91.3</td>
<td style="vertical-align: top; text-align: left">92.4</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Radar scan of the ionosphere</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">78.6</td>
<td style="vertical-align: top; text-align: left">82.8</td>
<td style="vertical-align: top; text-align: left">82.8</td>
<td style="vertical-align: top; text-align: left">81.4</td>
<td style="vertical-align: top; text-align: left">84.2</td>
<td style="vertical-align: top; text-align: left">88.5</td>
<td style="vertical-align: top; text-align: left">90</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">FA</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">58</td>
<td style="vertical-align: top; text-align: left">66</td>
<td style="vertical-align: top; text-align: left">62</td>
<td style="vertical-align: top; text-align: left">70</td>
<td style="vertical-align: top; text-align: left">70</td>
<td style="vertical-align: top; text-align: left">74</td>
<td style="vertical-align: top; text-align: left">76</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">FV</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">87.3</td>
<td style="vertical-align: top; text-align: left">86.7</td>
<td style="vertical-align: top; text-align: left">63.3</td>
<td style="vertical-align: top; text-align: left">83.3</td>
<td style="vertical-align: top; text-align: left">68.3</td>
<td style="vertical-align: top; text-align: left">89</td>
<td style="vertical-align: top; text-align: left">90</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">FL</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">85.7</td>
<td style="vertical-align: top; text-align: left">78.6</td>
<td style="vertical-align: top; text-align: left">85.7</td>
<td style="vertical-align: top; text-align: left">85.7</td>
<td style="vertical-align: top; text-align: left">71.4</td>
<td style="vertical-align: top; text-align: left">89.3</td>
<td style="vertical-align: top; text-align: left">96.4</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">CR</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">69.2</td>
<td style="vertical-align: top; text-align: left">69.2</td>
<td style="vertical-align: top; text-align: left">71.8</td>
<td style="vertical-align: top; text-align: left">76.9</td>
<td style="vertical-align: top; text-align: left">66.7</td>
<td style="vertical-align: top; text-align: left">69.7</td>
<td style="vertical-align: top; text-align: left">79.5</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">LO</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">64</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">74</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">74</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">66</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">76</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">74</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">86</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>Since the point estimates of the classification accuracy are inessential, Table <xref rid="j_info1192_tab_006">6</xref> gives confidence intervals covering the true accuracy values with a confidence probability of 0.95 for all algorithms.</p>
<table-wrap id="j_info1192_tab_006">
<label>Table 6</label>
<caption>
<p>Confidence intervals of classification accuracy.</p>
</caption>
<table>
<thead>
<tr>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Problem</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Algorithm measure</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">1-R</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">RIP-PER</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">CART</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">C4.5</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Random forest</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">Adaboost</td>
<td style="vertical-align: top; text-align: left; border-top: solid thin; border-bottom: solid thin">LAD</td>
</tr>
</thead>
<tbody>
<tr>
<td style="vertical-align: top; text-align: left">SPAM detection</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">(79.8; 81.4)</td>
<td style="vertical-align: top; text-align: left">(90.8; 91.8)</td>
<td style="vertical-align: top; text-align: left">(89.8; 90.6)</td>
<td style="vertical-align: top; text-align: left">(89.6; 90.8)</td>
<td style="vertical-align: top; text-align: left">(88.6; 89.6)</td>
<td style="vertical-align: top; text-align: left">(90.8; 91.8)</td>
<td style="vertical-align: top; text-align: left">(92; 92.8)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">Radar scan of the ionosphere</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">(78.1; 79.1)</td>
<td style="vertical-align: top; text-align: left">(82.3; 83.3)</td>
<td style="vertical-align: top; text-align: left">(82.3; 83.3)</td>
<td style="vertical-align: top; text-align: left">(79.7; 82.1)</td>
<td style="vertical-align: top; text-align: left">(83.7; 84.7)</td>
<td style="vertical-align: top; text-align: left">(88; 89)</td>
<td style="vertical-align: top; text-align: left">(89.6; 90.4)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">FA</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">(57.3; 58.7)</td>
<td style="vertical-align: top; text-align: left">(65.3; 66.7)</td>
<td style="vertical-align: top; text-align: left">(61.3; 62.7)</td>
<td style="vertical-align: top; text-align: left">(69.1; 70.9)</td>
<td style="vertical-align: top; text-align: left">(69.3; 70.7)</td>
<td style="vertical-align: top; text-align: left">(73.3; 74.7)</td>
<td style="vertical-align: top; text-align: left">(75.5; 76.5)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">FV</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">(86.6; 88)</td>
<td style="vertical-align: top; text-align: left">(86.1; 87.3)</td>
<td style="vertical-align: top; text-align: left">(62.7; 63.9)</td>
<td style="vertical-align: top; text-align: left">(82.5; 84.1)</td>
<td style="vertical-align: top; text-align: left">(67.7; 68.9)</td>
<td style="vertical-align: top; text-align: left">(88.5; 89.5)</td>
<td style="vertical-align: top; text-align: left">(89.5; 90.5)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">FL</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">(85.1; 86.3)</td>
<td style="vertical-align: top; text-align: left">(78.1; 79.1)</td>
<td style="vertical-align: top; text-align: left">(85.1; 86.1)</td>
<td style="vertical-align: top; text-align: left">(84.9; 86.3)</td>
<td style="vertical-align: top; text-align: left">(69.9; 74.9)</td>
<td style="vertical-align: top; text-align: left">(88.8; 89.8)</td>
<td style="vertical-align: top; text-align: left">(96; 96.8)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left">CR</td>
<td style="vertical-align: top; text-align: left">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left">(68.4; 70)</td>
<td style="vertical-align: top; text-align: left">(68.4; 69)</td>
<td style="vertical-align: top; text-align: left">(71; 72.6)</td>
<td style="vertical-align: top; text-align: left">(76; 77.8)</td>
<td style="vertical-align: top; text-align: left">(66; 67.4)</td>
<td style="vertical-align: top; text-align: left">(69; 70.4)</td>
<td style="vertical-align: top; text-align: left">(79; 80)</td>
</tr>
<tr>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">LO</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">The number of correctly identified observations, %</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(63.3; 64.7)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(73.3; 74.7)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(73.3; 74.7)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(65.1; 66.9)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(75.3; 76.7)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(73.3; 74.7)</td>
<td style="vertical-align: top; text-align: left; border-bottom: solid thin">(85.5; 86.5)</td>
</tr>
</tbody>
</table>
</table-wrap>
<p>According to the data provided in Tables <xref rid="j_info1192_tab_005">5</xref>–<xref rid="j_info1192_tab_006">6</xref>, the modified method of logical analysis of data is superior in accuracy to the classification algorithms it has been compared to.</p>
</sec>
<sec id="j_info1192_s_010">
<label>4</label>
<title>Conclusion</title>
<p>An optimization model has been created for building patterns covering significantly different subsets of observations from the training set. This model helps to improve the generalization capability of the classifier built upon these rules. An algorithmic pattern-aggregation procedure has been designed that leads to an increased information content of the rules, effectively helping to improve the accuracy of the decisions made by the classifier. Algorithmic procedures have been developed to reduce the number of patterns in the original classifier while retaining the high accuracy.</p>
<p>The study offers a modified method of logical analysis of data based on the designed algorithmic procedures, which, when applied, helps to increase the interpretability of the classifier and improve its generalization capability. By finding a solution to practical problems, the authors have empirically verified the applicability of optimization models to the task of building informative patterns and the efficiency of the designed algorithmic procedures in relation to the method of logical analysis of data. The accuracy of the modified method of logical analysis of data has been compared against other classification algorithms on practical problems. It turned out that the method has demonstrated better accuracy when solving the proposed problems.</p>
<p>The acquired results advance the studies in the field of logical algorithms of classification and can provide a framework for designing more enhanced decision support systems working on recognition and prediction. The most important advantage of such systems is going to be the ability to interpret the solutions produced by them and substantiate the recommendations they will give. Experience has proved that often the availability of such opportunities is central to a user’s work on recognition and prediction problems.</p>
</sec>
</body>
<back>
<ref-list id="j_info1192_reflist_001">
<title>References</title>
<ref id="j_info1192_ref_001">
<mixed-citation publication-type="journal"><string-name><surname>Alexe</surname>, <given-names>G.</given-names></string-name>, <string-name><surname>Alexe</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Axelrod</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Boros</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Reiss</surname>, <given-names>M.</given-names></string-name> (<year>2002</year>). <article-title>Combinatorial analysis of breast cancer data from image cytometry and gene expression microarrays</article-title>. <source>RUTCOR Technical Report</source>, <volume>3</volume>, <fpage>1</fpage>–<lpage>12</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_002">
<mixed-citation publication-type="journal"><string-name><surname>Antamoshkin</surname>, <given-names>A.N.</given-names></string-name>, <string-name><surname>Masich</surname>, <given-names>I.S.</given-names></string-name> (<year>2006</year>). <article-title>Heuristic search algorithms for monotonic pseudo-Boolean function conditional optimization</article-title>. <source>Problems of Mechanical Engineering and Automation</source>, <volume>5</volume>(<issue>1</issue>), <fpage>55</fpage>–<lpage>61</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_003">
<mixed-citation publication-type="journal"><string-name><surname>Antamoshkin</surname>, <given-names>A.N.</given-names></string-name>, <string-name><surname>Masich</surname>, <given-names>I.S.</given-names></string-name> (<year>2007</year>a). <article-title>Identification of pseudo-Boolean function properties</article-title>. <source>Problems of Mechanical Engineering and Automation</source>, <volume>2</volume>, <fpage>66</fpage>–<lpage>69</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_004">
<mixed-citation publication-type="journal"><string-name><surname>Antamoshkin</surname>, <given-names>A.N.</given-names></string-name>, <string-name><surname>Masich</surname>, <given-names>I.S.</given-names></string-name> (<year>2007</year>b). <article-title>Pseudo-Boolean optimization in case of unconnected feasible sets</article-title>. <source>Models and Algorithms for Global Optimization, Series: Springer Optimization and Its Applications</source>, <volume>4</volume>(<issue>16</issue>), <fpage>111</fpage>–<lpage>122</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_005">
<mixed-citation publication-type="journal"><string-name><surname>Antamoshkin</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Semenkin</surname>, <given-names>E.</given-names></string-name> (<year>1998</year>). <article-title>Local search efficiency when optimizing unimodal pseudoboolean functions</article-title>. <source>Informatica</source>, <volume>9</volume>(<issue>3</issue>), <fpage>279</fpage>–<lpage>296</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_006">
<mixed-citation publication-type="journal"><string-name><surname>Bagirov</surname>, <given-names>A.M.</given-names></string-name> (<year>2011</year>). <article-title>Fast modified global <italic>k</italic>-means algorithm for incremental cluster construction</article-title>. <source>Pattern Recognition</source>, <volume>44</volume>, <fpage>866</fpage>–<lpage>876</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_007">
<mixed-citation publication-type="book"><string-name><surname>Barsegyan</surname>, <given-names>A.A.</given-names></string-name>, <string-name><surname>Kupriyanov</surname>, <given-names>M.S.</given-names></string-name>, <string-name><surname>Stepanenko</surname>, <given-names>V.V.</given-names></string-name>, <string-name><surname>Kholod</surname>, <given-names>I.I.</given-names></string-name> (<year>2004</year>). <source>Method and Models of Data Analysis: OLAP and Data Mining</source>. <comment>BHV-Peterburg, Saint Petersburg (in Russian)</comment>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_008">
<mixed-citation publication-type="journal"><string-name><surname>Bonates</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Kogan</surname>, <given-names>A.</given-names></string-name> (<year>2006</year>). <article-title>Maximum patterns in datasets</article-title>. <source>RUTCOR Research Report</source>, <volume>9</volume>, <fpage>1</fpage>–<lpage>18</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_009">
<mixed-citation publication-type="journal"><string-name><surname>Boros</surname>, <given-names>E.</given-names></string-name>, <string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Kogan</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Crama</surname>, <given-names>Y.</given-names></string-name>, <string-name><surname>Ibaraki</surname>, <given-names>T.</given-names></string-name>, <string-name><surname>Makino</surname>, <given-names>K.</given-names></string-name> (<year>2009</year>). <article-title>Logical analysis of data: classification with justification</article-title>. <source>RUTCOR Technical Report</source>, <volume>5</volume>, <fpage>1</fpage>–<lpage>34</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_010">
<mixed-citation publication-type="journal"><string-name><surname>Brauner</surname>, <given-names>M.W.</given-names></string-name>, <string-name><surname>Brauner</surname>, <given-names>D.</given-names></string-name>, <string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Lozina</surname>, <given-names>I.</given-names></string-name>, <string-name><surname>Valeyre</surname>, <given-names>D.</given-names></string-name> (<year>2004</year>). <article-title>Logical analysis of computer tomography data to differentiate entities of idiopathic interstitial pneumonias</article-title>. <source>RUTCOR Research Report</source>, <volume>30</volume>, <fpage>1</fpage>–<lpage>17</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_011">
<mixed-citation publication-type="book"><string-name><surname>Golovenkin</surname>, <given-names>S.E.</given-names></string-name>, <string-name><surname>Gorban</surname>, <given-names>A.N.</given-names></string-name>, <string-name><surname>Schulman</surname>, <given-names>B.A.</given-names></string-name> <etal>et al.</etal> (<year>1997</year>). <source>Complications of Myocardial Infarction: Database for Approbation of Recognition and Forecast Systems</source>. <publisher-name>Computing Center of Siberian Branch of Russian Academy of Sciences</publisher-name>, <publisher-loc>Krasnoyarsk</publisher-loc> <comment>(in Russian)</comment>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_012">
<mixed-citation publication-type="journal"><string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Bonates</surname>, <given-names>T.</given-names></string-name> (<year>2005</year>). <article-title>Logical analysis of data: from combinatorial optimization to medical applications</article-title>. <source>RUTCOR Research Report</source>, <volume>10</volume>, <fpage>1</fpage>–<lpage>27</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_013">
<mixed-citation publication-type="journal"><string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Kogan</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Lejeune</surname>, <given-names>M.</given-names></string-name> (<year>2004</year>a). <article-title>Modeling country risk ratings using partial orders</article-title>. <source>RUTCOR Research Report</source>, <volume>24</volume>, <fpage>1</fpage>–<lpage>30</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_014">
<mixed-citation publication-type="journal"><string-name><surname>Hammer</surname>, <given-names>P.L.</given-names></string-name>, <string-name><surname>Kogan</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Simeone</surname>, <given-names>B.</given-names></string-name>, <string-name><surname>Szedmak</surname>, <given-names>S.</given-names></string-name> (<year>2004</year>b). <article-title>Pareto-optimal patterns in logical analysis of data</article-title>. <source>Discrete Applied Mathematics</source>, <volume>144</volume>, <fpage>79</fpage>–<lpage>102</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_015">
<mixed-citation publication-type="journal"><string-name><surname>Herrera</surname>, <given-names>J.F.A.</given-names></string-name>, <string-name><surname>Subasi</surname>, <given-names>M.M.</given-names></string-name> (<year>2013</year>). <article-title>Logical analysis of multi-class data</article-title>. <source>RUTCOR Technical Report</source>, <volume>5</volume>, <fpage>1</fpage>–<lpage>24</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_016">
<mixed-citation publication-type="journal"><string-name><surname>Hwang</surname>, <given-names>H.K.</given-names></string-name>, <string-name><surname>Choi</surname>, <given-names>J.Y.</given-names></string-name> (<year>2015</year>). <article-title>Pattern generation for multi-class LAD using iterative genetic algorithm with flexible chromosomes and multiple populations</article-title>. <source>Expert Systems with Applications: An International Journal</source>, <volume>42</volume>(<issue>2</issue>), <fpage>833</fpage>–<lpage>843</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_017">
<mixed-citation publication-type="journal"><string-name><surname>Kotsiantis</surname>, <given-names>S.B.</given-names></string-name> (<year>2007</year>). <article-title>Supervised machine leaning: a review of classification techniques</article-title>. <source>Informatica</source>, <volume>31</volume>, <fpage>249</fpage>–<lpage>268</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_018">
<mixed-citation publication-type="journal"><string-name><surname>Kuzmich</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Masich</surname>, <given-names>I.</given-names></string-name> (<year>2012</year>). <article-title>Building a classification model as a composition of informative patterns</article-title>. <source>Management Systems and Information Technologies</source>, <volume>2</volume>(<issue>48</issue>), <fpage>18</fpage>–<lpage>22</lpage>. <comment>(in Russian)</comment>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_019">
<mixed-citation publication-type="journal"><string-name><surname>Kuzmich</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Masich</surname>, <given-names>I.</given-names></string-name> (<year>2014</year>). <article-title>Modification to an objective function for building patterns aimed at increasing the distinction between the rules of the classification model</article-title>. <source>Management Systems and Information Technologies</source>, <volume>2</volume>(<issue>56</issue>), <fpage>14</fpage>–<lpage>18</lpage> <comment>(in Russian)</comment>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_020">
<mixed-citation publication-type="journal"><string-name><surname>Provost</surname>, <given-names>F.</given-names></string-name>, <string-name><surname>Hibert</surname>, <given-names>C.</given-names></string-name>, <string-name><surname>Malet</surname>, <given-names>J.-P.</given-names></string-name> (<year>2016</year>). <article-title>Automatic classification of endogenous landslide seismicity using the Random Forest supervised classifier</article-title>. <source>Geophysical Research Abstracts</source>, <volume>18</volume>, <fpage>23</fpage>–<lpage>35</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_021">
<mixed-citation publication-type="journal"><string-name><surname>Rastrigin</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Freymanis</surname>, <given-names>E.</given-names></string-name> (<year>1988</year>). <article-title>Solving problems of multiple-scale optimization using random-search methods</article-title>. <source>Problems of Random Search</source>, <volume>11</volume>, <fpage>9</fpage>–<lpage>25</lpage> <comment>(in Russian)</comment>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_022">
<mixed-citation publication-type="journal"><string-name><surname>Shi</surname>, <given-names>K.-Q.</given-names></string-name>, <string-name><surname>Zhou</surname>, <given-names>Y.-Y.</given-names></string-name>, <string-name><surname>Yan</surname>, <given-names>H.-D.</given-names></string-name>, <string-name><surname>Li</surname>, <given-names>H.</given-names></string-name>, <string-name><surname>Wu</surname>, <given-names>F.-L.</given-names></string-name>, <string-name><surname>Xie</surname>, <given-names>Y.-Y.</given-names></string-name>, <string-name><surname>Braddock</surname>, <given-names>M.</given-names></string-name>, <string-name><surname>Lin</surname>, <given-names>X.-Y.</given-names></string-name>, <string-name><surname>Zheng</surname>, <given-names>M.-H.</given-names></string-name> (<year>2016</year>). <article-title>Classification and regression tree analysis of acute-on-chronic hepatitis B liver failure: Seeing the forest for the trees</article-title>. <source>Journal of Viral Hepatitis</source>, <volume>24</volume>(<issue>2</issue>), <fpage>132</fpage>–<lpage>140</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_023">
<mixed-citation publication-type="journal"><string-name><surname>Stupina</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Ezhemanskaja</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Kuzmich</surname>, <given-names>R.</given-names></string-name>, <string-name><surname>Vaingauz</surname>, <given-names>A.</given-names></string-name>, <string-name><surname>Korpacheva</surname>, <given-names>L.</given-names></string-name>, <string-name><surname>Fyodorova</surname>, <given-names>A.</given-names></string-name> (<year>2012</year>). <article-title>Multiple-attribute decision making method based on qualitative information</article-title>. <source>Modern Problems of Science and Education</source>, <volume>5</volume>, <fpage>1</fpage>–<lpage>8</lpage> <comment>(in Russian)</comment>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_024">
<mixed-citation publication-type="journal"><string-name><surname>Sun</surname>, <given-names>B.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Wang</surname>, <given-names>J.</given-names></string-name>, <string-name><surname>Chen</surname>, <given-names>H.</given-names></string-name> (<year>2016</year>). <article-title>A robust multi-class AdaBoost algorithm for mislabeled noisy data</article-title>. <source>Knowledge-Based Systems</source>, <volume>102</volume>, <fpage>87</fpage>–<lpage>102</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_025">
<mixed-citation publication-type="journal"><string-name><surname>Vijayarani</surname>, <given-names>S.</given-names></string-name>, <string-name><surname>Divya</surname>, <given-names>M.</given-names></string-name> (<year>2011</year>). <article-title>An efficient algorithm for generating classification rules</article-title>. <source>International Journal of Computer Science and Technology</source>, <volume>2</volume>(<issue>4</issue>), <fpage>512</fpage>–<lpage>515</lpage>.</mixed-citation>
</ref>
<ref id="j_info1192_ref_026">
<mixed-citation publication-type="other"><string-name><surname>Vorontsov</surname>, <given-names>K.</given-names></string-name> (2010). <italic>Lectures on logical algorithms of classification</italic>. Access mode: <ext-link ext-link-type="uri" xlink:href="http://www.machinelearning.ru/wiki/images/3/3e/Voron-ML-Logic.pdf">http://www.machinelearning.ru/wiki/images/3/3e/Voron-ML-Logic.pdf</ext-link> (in Russian).</mixed-citation>
</ref>
<ref id="j_info1192_ref_027">
<mixed-citation publication-type="other"><string-name><surname>Weka 3</surname></string-name> (2015). <italic>Data Mining with Open Source Machine Learning Software in Java</italic>. Access mode: <ext-link ext-link-type="uri" xlink:href="http://www.cs.waikato.ac.nz/˜ml/weka/index.html">http://www.cs.waikato.ac.nz/˜ml/weka/index.html</ext-link>.</mixed-citation>
</ref>
</ref-list>
</back>
</article>