<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-00671499</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-03T09:49:35+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Classifying Words: A Syllables-based Model</title>
            <author role="aut">
              <persName>
                <forename type="first">Pattaraporn</forename>
                <surname>Warintarawej</surname>
              </persName>
              <email type="md5">9db1406044d1a15d87241698cef887a1</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="numeric">937892</idno>
              <idno type="halauthorid" notation="string">696463-937892</idno>
              <affiliation ref="#struct-181"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Anne</forename>
                <surname>Laurent</surname>
              </persName>
              <email type="md5">30cc2fc4136879eb4d7049fc772e10b8</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">anne-laurent</idno>
              <idno type="idhal" notation="numeric">21743</idno>
              <idno type="halauthorid" notation="string">17629-21743</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-3708-6429</idno>
              <idno type="IDREF">https://www.idref.fr/075173735</idno>
              <affiliation ref="#struct-388310"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Pierre</forename>
                <surname>Pompidor</surname>
              </persName>
              <email type="md5">17ff8e635accc43def973e8fe5eed55c</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">pierre-pompidor</idno>
              <idno type="idhal" notation="numeric">170558</idno>
              <idno type="halauthorid" notation="string">14532-170558</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-5466-5137</idno>
              <affiliation ref="#struct-388310"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Armelle</forename>
                <surname>Cassanas</surname>
              </persName>
              <email type="md5">417b73ebd0f85ae788ff677754d898d1</email>
              <email type="domain">namaeconcept.com</email>
              <idno type="idhal" notation="numeric">920972</idno>
              <idno type="halauthorid" notation="string">617900-920972</idno>
              <affiliation ref="#struct-151869"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Bénédicte</forename>
                <surname>Laurent</surname>
              </persName>
              <email type="md5">8040be5bc5c85f077df8bfa191950e88</email>
              <email type="domain">namaeconcept.com</email>
              <idno type="idhal" notation="numeric">920973</idno>
              <idno type="halauthorid" notation="string">165872-920973</idno>
              <affiliation ref="#struct-151869"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Isabelle</forename>
                <surname>Gouat</surname>
              </persName>
              <email type="md5">01a8910ec35817770bca127295d8d38a</email>
              <email type="domain">lirmm.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2012-02-17 15:29:01</date>
              <date type="whenModified">2024-03-23 18:13:16</date>
              <date type="whenReleased">2012-02-17 15:30:42</date>
              <date type="whenProduced">2011-08-29</date>
              <date type="whenEndEmbargoed">2012-02-17</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00671499v1/document">
                <date notBefore="2012-02-17"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00671499v1/file/11_DEXA.PDF" id="file-671499-1107124">
                <date notBefore="2012-02-17"/>
              </ref>
              <ref type="externalLink" target="http://hal.inria.fr/docs/00/67/14/99/PDF/11_DEXA.PDF"/>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="102079">
                <persName>
                  <forename>Isabelle</forename>
                  <surname>Gouat</surname>
                </persName>
                <email type="md5">01a8910ec35817770bca127295d8d38a</email>
                <email type="domain">lirmm.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-00671499</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-00671499</idno>
            <idno type="halBibtex">warintarawej:lirmm-00671499</idno>
            <idno type="halRefHtml">&lt;i&gt;DEXA 2011 - 22nd International Conference on Database and Expert Systems Applications&lt;/i&gt;, Aug 2011, Toulouse, France. pp.208-212, &lt;a target="_blank" href="https://dx.doi.org/10.1109/DEXA.2011.21"&gt;&amp;#x27E8;10.1109/DEXA.2011.21&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">DEXA 2011 - 22nd International Conference on Database and Expert Systems Applications, Aug 2011, Toulouse, France. pp.208-212, &amp;#x27E8;10.1109/DEXA.2011.21&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-671499-1107124"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="LIRMM">Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</idno>
            <idno type="stamp" n="MIPS">Mathématiques, Informatique, Physique et Systèmes</idno>
            <idno type="stamp" n="UNIV-MONTPELLIER">Université de Montpellier</idno>
            <idno type="stamp" n="UM-2015-2021" corresp="UNIV-MONTPELLIER">Université de Montpellier (2015-2021)</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Classifying Words: A Syllables-based Model</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Pattaraporn</forename>
                    <surname>Warintarawej</surname>
                  </persName>
                  <email type="md5">9db1406044d1a15d87241698cef887a1</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="numeric">937892</idno>
                  <idno type="halauthorid" notation="string">696463-937892</idno>
                  <affiliation ref="#struct-181"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Anne</forename>
                    <surname>Laurent</surname>
                  </persName>
                  <email type="md5">30cc2fc4136879eb4d7049fc772e10b8</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">anne-laurent</idno>
                  <idno type="idhal" notation="numeric">21743</idno>
                  <idno type="halauthorid" notation="string">17629-21743</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-3708-6429</idno>
                  <idno type="IDREF">https://www.idref.fr/075173735</idno>
                  <affiliation ref="#struct-388310"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Pierre</forename>
                    <surname>Pompidor</surname>
                  </persName>
                  <email type="md5">17ff8e635accc43def973e8fe5eed55c</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">pierre-pompidor</idno>
                  <idno type="idhal" notation="numeric">170558</idno>
                  <idno type="halauthorid" notation="string">14532-170558</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-5466-5137</idno>
                  <affiliation ref="#struct-388310"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Armelle</forename>
                    <surname>Cassanas</surname>
                  </persName>
                  <email type="md5">417b73ebd0f85ae788ff677754d898d1</email>
                  <email type="domain">namaeconcept.com</email>
                  <idno type="idhal" notation="numeric">920972</idno>
                  <idno type="halauthorid" notation="string">617900-920972</idno>
                  <affiliation ref="#struct-151869"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Bénédicte</forename>
                    <surname>Laurent</surname>
                  </persName>
                  <email type="md5">8040be5bc5c85f077df8bfa191950e88</email>
                  <email type="domain">namaeconcept.com</email>
                  <idno type="idhal" notation="numeric">920973</idno>
                  <idno type="halauthorid" notation="string">165872-920973</idno>
                  <affiliation ref="#struct-151869"/>
                </author>
              </analytic>
              <monogr>
                <idno type="isbn">978-0-7695-4486-1</idno>
                <meeting>
                  <title>DEXA 2011 - 22nd International Conference on Database and Expert Systems Applications</title>
                  <date type="start">2011-08-29</date>
                  <date type="end">2011-09-02</date>
                  <settlement>Toulouse</settlement>
                  <country key="FR">France</country>
                </meeting>
                <imprint>
                  <biblScope unit="pp">208-212</biblScope>
                  <date type="datePub">2011-09</date>
                </imprint>
              </monogr>
              <idno type="doi">10.1109/DEXA.2011.21</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">Classification</term>
                <term xml:lang="en">Words Classification</term>
                <term xml:lang="en">Feature Selection</term>
                <term xml:lang="en">Syllables</term>
                <term xml:lang="en">Discriminative Features</term>
              </keywords>
              <classCode scheme="halDomain" n="info.info-db">Computer Science [cs]/Databases [cs.DB]</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>Text classification has been extensively studied by linguists and computer scientists. However, there are very few works on classification of words into classes or concepts (e.g. thesaurus). In this paper, we consider this topic, especially in the context of the classification of names like brand names or neologisms. The challenge is thus to provide automated tools to analyze new names by classifying them into concepts. Then, for example, a naming company customer can be informed about which concept a new name is closest to. As we argue that a word can belong to several concepts, we propose to consider the top-k classification approach. Moreover, we rely on syllables to build the classification model. The word corpus is collected from French thesaurus. All labeled-words are separated into syllables. Feature selection techniques are used to select discriminative syllables. We use a syllables frequency (SF) and mutual information (MI) performing with Naive Bayes classifier and K-nearest neighbor (KNN). Instead of selecting only one class, the model select top-k classes ranking them by a classifier score. The result shows the top-k classification model helps to analyze a new word by showing that it can be related to more than one concept. Moreover, the set of discriminative syllables can be used to explain the classification results which makes the results more meaningful.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="researchteam" xml:id="struct-388310" status="OLD">
          <orgName>Fouille de données environnementales</orgName>
          <orgName type="acronym">TATOO</orgName>
          <date type="end">2013</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-151869" status="VALID">
          <orgName>Namae Concept</orgName>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.namaeconcept.com</ref>
          </desc>
          <listRelation>
            <relation active="#struct-338165" type="direct"/>
            <relation active="#struct-338166" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-338165" status="INCOMING">
          <orgName>namaeconcept</orgName>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
        </org>
        <org type="institution" xml:id="struct-338166" status="INCOMING">
          <orgName>name concept</orgName>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>