<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-00834051</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-18T22:20:05+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Software understanding: Automatic classification of software identifiers</title>
            <author role="aut">
              <persName>
                <forename type="first">Pattaraporn</forename>
                <surname>Warintarawej</surname>
              </persName>
              <email type="md5">9db1406044d1a15d87241698cef887a1</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="numeric">937892</idno>
              <idno type="halauthorid" notation="string">696463-937892</idno>
              <affiliation ref="#struct-409262"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Anne</forename>
                <surname>Laurent</surname>
              </persName>
              <email type="md5">30cc2fc4136879eb4d7049fc772e10b8</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">anne-laurent</idno>
              <idno type="idhal" notation="numeric">21743</idno>
              <idno type="halauthorid" notation="string">17629-21743</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-3708-6429</idno>
              <idno type="IDREF">https://www.idref.fr/075173735</idno>
              <affiliation ref="#struct-475163"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Marianne</forename>
                <surname>Huchard</surname>
              </persName>
              <email type="md5">7e694b40b623a293a6fa894db63be2e7</email>
              <email type="domain">lirmm.fr</email>
              <ptr type="url" target="https://marianne-huchard.fr/"/>
              <idno type="idhal" notation="string">marianne-huchard</idno>
              <idno type="idhal" notation="numeric">8651</idno>
              <idno type="halauthorid" notation="string">24066-8651</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-6309-7503</idno>
              <idno type="IDREF">https://www.idref.fr/060595175</idno>
              <affiliation ref="#struct-388202"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Mathieu</forename>
                <surname>Lafourcade</surname>
              </persName>
              <email type="md5">98119012c50211e3f4fd9facbd61934f</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">mathieu-lafourcade</idno>
              <idno type="idhal" notation="numeric">172381</idno>
              <idno type="halauthorid" notation="string">17916-172381</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-2832-2143</idno>
              <affiliation ref="#struct-392245"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Pierre</forename>
                <surname>Pompidor</surname>
              </persName>
              <email type="md5">17ff8e635accc43def973e8fe5eed55c</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">pierre-pompidor</idno>
              <idno type="idhal" notation="numeric">170558</idno>
              <idno type="halauthorid" notation="string">14532-170558</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-5466-5137</idno>
              <affiliation ref="#struct-409262"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Pierre</forename>
                <surname>Pompidor</surname>
              </persName>
              <email type="md5">17ff8e635accc43def973e8fe5eed55c</email>
              <email type="domain">lirmm.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2013-06-14 09:19:15</date>
              <date type="whenWritten">2013</date>
              <date type="whenModified">2026-02-11 10:47:57</date>
              <date type="whenReleased">2013-06-19 17:01:45</date>
              <date type="whenProduced">2015</date>
              <date type="whenEndEmbargoed">2013-06-14</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00834051v1/document">
                <date notBefore="2013-06-14"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00834051v1/file/Software_Understanding.pdf" id="file-834051-1096419">
                <date notBefore="2013-06-14"/>
              </ref>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="127693">
                <persName>
                  <forename>Pierre</forename>
                  <surname>Pompidor</surname>
                </persName>
                <email type="md5">17ff8e635accc43def973e8fe5eed55c</email>
                <email type="domain">lirmm.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-00834051</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-00834051</idno>
            <idno type="halBibtex">warintarawej:lirmm-00834051</idno>
            <idno type="halRefHtml">&lt;i&gt;Intelligent Data Analysis&lt;/i&gt;, 2015, 19 (4), pp.761-778. &lt;a target="_blank" href="https://dx.doi.org/10.3233/IDA-150744"&gt;&amp;#x27E8;10.3233/IDA-150744&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">Intelligent Data Analysis, 2015, 19 (4), pp.761-778. &amp;#x27E8;10.3233/IDA-150744&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-834051-1096419"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="ADVANSE" corresp="LIRMM">ADVanced Analytics for data SciencE</idno>
            <idno type="stamp" n="TEXTE" corresp="LIRMM">Exploration et Exploitation de Données Textuelles</idno>
            <idno type="stamp" n="MAREL" corresp="LIRMM">Models And Reuse Engineering, Languages</idno>
            <idno type="stamp" n="LIRMM">Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</idno>
            <idno type="stamp" n="MIPS">Mathématiques, Informatique, Physique et Systèmes</idno>
            <idno type="stamp" n="UNIV-MONTPELLIER">Université de Montpellier</idno>
            <idno type="stamp" n="FADO" corresp="LIRMM">Fuzziness, Alignments, Data &amp; Ontologies</idno>
            <idno type="stamp" n="UM-2015-2021" corresp="UNIV-MONTPELLIER">Université de Montpellier (2015-2021)</idno>
            <idno type="stamp" n="WEB3">WEB3</idno>
            <idno type="stamp" n="WEB-CUBE" corresp="LIRMM">WEB-CUBE</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Software understanding: Automatic classification of software identifiers</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Pattaraporn</forename>
                    <surname>Warintarawej</surname>
                  </persName>
                  <email type="md5">9db1406044d1a15d87241698cef887a1</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="numeric">937892</idno>
                  <idno type="halauthorid" notation="string">696463-937892</idno>
                  <affiliation ref="#struct-409262"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Anne</forename>
                    <surname>Laurent</surname>
                  </persName>
                  <email type="md5">30cc2fc4136879eb4d7049fc772e10b8</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">anne-laurent</idno>
                  <idno type="idhal" notation="numeric">21743</idno>
                  <idno type="halauthorid" notation="string">17629-21743</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-3708-6429</idno>
                  <idno type="IDREF">https://www.idref.fr/075173735</idno>
                  <affiliation ref="#struct-475163"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Marianne</forename>
                    <surname>Huchard</surname>
                  </persName>
                  <email type="md5">7e694b40b623a293a6fa894db63be2e7</email>
                  <email type="domain">lirmm.fr</email>
                  <ptr type="url" target="https://marianne-huchard.fr/"/>
                  <idno type="idhal" notation="string">marianne-huchard</idno>
                  <idno type="idhal" notation="numeric">8651</idno>
                  <idno type="halauthorid" notation="string">24066-8651</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-6309-7503</idno>
                  <idno type="IDREF">https://www.idref.fr/060595175</idno>
                  <affiliation ref="#struct-388202"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Mathieu</forename>
                    <surname>Lafourcade</surname>
                  </persName>
                  <email type="md5">98119012c50211e3f4fd9facbd61934f</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">mathieu-lafourcade</idno>
                  <idno type="idhal" notation="numeric">172381</idno>
                  <idno type="halauthorid" notation="string">17916-172381</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-2832-2143</idno>
                  <affiliation ref="#struct-392245"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Pierre</forename>
                    <surname>Pompidor</surname>
                  </persName>
                  <email type="md5">17ff8e635accc43def973e8fe5eed55c</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">pierre-pompidor</idno>
                  <idno type="idhal" notation="numeric">170558</idno>
                  <idno type="halauthorid" notation="string">14532-170558</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-5466-5137</idno>
                  <affiliation ref="#struct-409262"/>
                </author>
              </analytic>
              <monogr>
                <idno type="halJournalId" status="VALID">14249</idno>
                <idno type="issn">1088-467X</idno>
                <title level="j">Intelligent Data Analysis</title>
                <imprint>
                  <publisher>IOS Press</publisher>
                  <biblScope unit="volume">19</biblScope>
                  <biblScope unit="issue">4</biblScope>
                  <biblScope unit="pp">761-778</biblScope>
                  <date type="datePub">2015</date>
                </imprint>
              </monogr>
              <idno type="doi">10.3233/IDA-150744</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">Text classification</term>
                <term xml:lang="en">Software Engineering</term>
                <term xml:lang="en">Data Mining</term>
                <term xml:lang="en">Automatic Software Understanding</term>
              </keywords>
              <classCode scheme="halDomain" n="info.info-ir">Computer Science [cs]/Information Retrieval [cs.IR]</classCode>
              <classCode scheme="halTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halOldTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halTreeTypology" n="ART">Journal articles</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Identifier names (e.g., packages, classes, methods, variables) are one of most important software comprehension sources. Identifier names need to be analyzed in order to support collaborative software engineering and to reuse source codes. Indeed, they convey domain concept of softwares. For instance, ''getMinimumSupport'' would be associated with association rule concept in data mining softwares, while some are difficult to recognize such as the case of mixing parts of words (e.g., ''initFeatSet''). We thus propose methods for assisting automatic software understanding by classifying identifier names into domain concept categories. An innovative solution based on data mining algorithms is proposed. Our approach aims to learn character patterns of identifier names. The main challenges are (1) to automatically split identifier names into relevant constituent subnames (2) to build a model associating such a set of subnames to predefined domain concepts. For this purpose, we propose a novel manner for splitting such identifiers into their constituent words and use N-grams based text classification to predict the related domain concept. In this article, we report the theoretical method and the algorithms we propose, together with the experiments run on real software source codes that show the interest of our approach.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-409262" status="OLD">
          <orgName>ADVanced Analytics for data SciencE</orgName>
          <orgName type="acronym">ADVANSE</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/ADVANSE/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="researchteam" xml:id="struct-475163" status="OLD">
          <orgName>WEB Architecture x Semantic WEB x WEB of Data</orgName>
          <orgName type="acronym">WEB3</orgName>
          <date type="start">2017-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/WEB3/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="researchteam" xml:id="struct-388202" status="OLD">
          <orgName>Models And Reuse Engineering, Languages</orgName>
          <orgName type="acronym">MAREL</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/MAREL/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="researchteam" xml:id="struct-392245" status="OLD">
          <orgName>Exploration et exploitation de données textuelles</orgName>
          <orgName type="acronym">TEXTE</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/TEXTE/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>