<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-01054903</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-03T21:06:51+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="fr">De nouvelles pondérations adaptées à la classification de petits volumes de données textuelles</title>
            <author role="aut">
              <persName>
                <forename type="first">Flavien</forename>
                <surname>Bouillot</surname>
              </persName>
              <email type="md5">a797c51037fb2b23b677d25a59560455</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="numeric">927300</idno>
              <idno type="halauthorid" notation="string">646674-927300</idno>
              <affiliation ref="#struct-409262"/>
              <affiliation ref="#struct-23810"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Pascal</forename>
                <surname>Poncelet</surname>
              </persName>
              <email type="md5">a01013fff28244b30818d056a90083f1</email>
              <email type="domain">ema.fr</email>
              <idno type="idhal" notation="string">pascal-poncelet</idno>
              <idno type="idhal" notation="numeric">6247</idno>
              <idno type="halauthorid" notation="string">23856-6247</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-8277-3490</idno>
              <idno type="IDREF">https://www.idref.fr/069260613</idno>
              <affiliation ref="#struct-409262"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Mathieu</forename>
                <surname>Roche</surname>
              </persName>
              <email type="md5">f3369d939820713d626eff81740a4eeb</email>
              <email type="domain">cirad.fr</email>
              <ptr type="url" target="http://agents.cirad.fr/index.php/Mathieu+ROCHE"/>
              <idno type="idhal" notation="string">mathieu-roche</idno>
              <idno type="idhal" notation="numeric">4967</idno>
              <idno type="halauthorid" notation="string">20190-4967</idno>
              <idno type="IDREF">https://www.idref.fr/09042087X</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-3272-8568</idno>
              <affiliation ref="#struct-568971"/>
              <affiliation ref="#struct-409262"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Mathieu</forename>
                <surname>Roche</surname>
              </persName>
              <email type="md5">db65d8027137d174ecf559fb797288b9</email>
              <email type="domain">cirad.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2018-11-06 11:50:38</date>
              <date type="whenModified">2025-03-21 15:14:31</date>
              <date type="whenReleased">2018-11-06 13:19:00</date>
              <date type="whenProduced">2014-01-28</date>
              <date type="whenEndEmbargoed">2018-11-06</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-01054903v1/document">
                <date notBefore="2018-11-06"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-01054903v1/file/lirmm-01054903.pdf" id="file-1913411-1944759">
                <date notBefore="2018-11-06"/>
              </ref>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="114352">
                <persName>
                  <forename>Mathieu</forename>
                  <surname>Roche</surname>
                </persName>
                <email type="md5">db65d8027137d174ecf559fb797288b9</email>
                <email type="domain">cirad.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-01054903</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-01054903</idno>
            <idno type="halBibtex">bouillot:lirmm-01054903</idno>
            <idno type="halRefHtml">&lt;i&gt;EGC: Extraction et Gestion des Connaissances&lt;/i&gt;, Jan 2014, Rennes, France. pp.131-142</idno>
            <idno type="halRef">EGC: Extraction et Gestion des Connaissances, Jan 2014, Rennes, France. pp.131-142</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-1913411-1944759"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CIRAD">CIRAD - Centre de coopération internationale en recherche agronomique pour le développement</idno>
            <idno type="stamp" n="AGROPARISTECH">AgroParisTech</idno>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="IRSTEA">IRSTEA - Institut national de recherche en sciences et technologies pour l'environnement et l'agriculture (&lt;b&gt;anciennement Cemagref&lt;/b&gt;)</idno>
            <idno type="stamp" n="ADVANSE" corresp="LIRMM">ADVanced Analytics for data SciencE</idno>
            <idno type="stamp" n="LIRMM">Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</idno>
            <idno type="stamp" n="AGROPOLIS">Agropolis</idno>
            <idno type="stamp" n="TETIS">TETIS</idno>
            <idno type="stamp" n="AGREENIUM">Archive ouverte en agrobiosciences</idno>
            <idno type="stamp" n="MIPS">Mathématiques, Informatique, Physique et Systèmes</idno>
            <idno type="stamp" n="UNIV-MONTPELLIER">Université de Montpellier</idno>
            <idno type="stamp" n="INRAE">Institut National de Recherche en Agriculture, Alimentation et Environnement</idno>
            <idno type="stamp" n="INRAEOCCITANIEMONTPELLIER" corresp="INRAE">INRAE Occitanie Montpellier</idno>
            <idno type="stamp" n="UM-2015-2021" corresp="UNIV-MONTPELLIER">Université de Montpellier (2015-2021)</idno>
            <idno type="stamp" n="MATHNUM">Département MathNum</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="fr">De nouvelles pondérations adaptées à la classification de petits volumes de données textuelles</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Flavien</forename>
                    <surname>Bouillot</surname>
                  </persName>
                  <email type="md5">a797c51037fb2b23b677d25a59560455</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="numeric">927300</idno>
                  <idno type="halauthorid" notation="string">646674-927300</idno>
                  <affiliation ref="#struct-409262"/>
                  <affiliation ref="#struct-23810"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Pascal</forename>
                    <surname>Poncelet</surname>
                  </persName>
                  <email type="md5">a01013fff28244b30818d056a90083f1</email>
                  <email type="domain">ema.fr</email>
                  <idno type="idhal" notation="string">pascal-poncelet</idno>
                  <idno type="idhal" notation="numeric">6247</idno>
                  <idno type="halauthorid" notation="string">23856-6247</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-8277-3490</idno>
                  <idno type="IDREF">https://www.idref.fr/069260613</idno>
                  <affiliation ref="#struct-409262"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Mathieu</forename>
                    <surname>Roche</surname>
                  </persName>
                  <email type="md5">f3369d939820713d626eff81740a4eeb</email>
                  <email type="domain">cirad.fr</email>
                  <ptr type="url" target="http://agents.cirad.fr/index.php/Mathieu+ROCHE"/>
                  <idno type="idhal" notation="string">mathieu-roche</idno>
                  <idno type="idhal" notation="numeric">4967</idno>
                  <idno type="halauthorid" notation="string">20190-4967</idno>
                  <idno type="IDREF">https://www.idref.fr/09042087X</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-3272-8568</idno>
                  <affiliation ref="#struct-568971"/>
                  <affiliation ref="#struct-409262"/>
                </author>
              </analytic>
              <monogr>
                <title level="m">14ème Conférence Internationale Francophone sur l’Extraction et Gestion des Connaissances</title>
                <meeting>
                  <title>EGC: Extraction et Gestion des Connaissances</title>
                  <date type="start">2014-01-28</date>
                  <date type="end">2014-01-31</date>
                  <settlement>Rennes</settlement>
                  <country key="FR">France</country>
                </meeting>
                <imprint>
                  <biblScope unit="volume">RNTI-E-26</biblScope>
                  <biblScope unit="pp">131-142</biblScope>
                  <date type="datePub">2014</date>
                </imprint>
              </monogr>
              <ref type="publisher">http://egc2014.irisa.fr</ref>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="fr">French</language>
            </langUsage>
            <textClass>
              <classCode scheme="halDomain" n="spi.other">Engineering Sciences [physics]/Other</classCode>
              <classCode scheme="halDomain" n="info.info-tt">Computer Science [cs]/Document and Text Processing</classCode>
              <classCode scheme="halDomain" n="info.info-ir">Computer Science [cs]/Information Retrieval [cs.IR]</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="fr">
              <p>Un des défis actuels dans le domaine de la classification supervisée de documents est de pouvoir produire un modèle fiable à partir d'un faible volume de données. Avec un volume conséquent de données, les classifieurs fournissent des résultats satisfaisants mais les performances sont dégradées lorsque celui-ci diminue. Nous proposons, dans cet article, de nouvelles méthodes de pondérations résistant à une diminution du volume de données. Leur efficacité, évaluée en utilisant des algorithmes de classification supervisés existants (Naive Bayes et Class-Feature-Centroid) sur deux corpus différents, est supérieure à celle des autres algorithmes lorsque le nombre de descripteurs diminue. Nous avons étudié en parallèle les paramètres influençant les différentes approches telles que le nombre de classes, de documents ou de descripteurs.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-409262" status="OLD">
          <orgName>ADVanced Analytics for data SciencE</orgName>
          <orgName type="acronym">ADVANSE</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/ADVANSE/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-23810" status="VALID">
          <orgName>Itesoft R&amp;D</orgName>
          <desc>
            <address>
              <addrLine>Parc d'Andron - Le Sequoïa 30470 Aimargues</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.itesoft.fr</ref>
          </desc>
          <listRelation>
            <relation active="#struct-365824" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-568971" status="OLD">
          <orgName>Territoires, Environnement, Télédétection et Information Spatiale</orgName>
          <orgName type="acronym">UMR TETIS</orgName>
          <date type="start">2012-01-01</date>
          <date type="end">2015-12-31</date>
          <desc>
            <address>
              <addrLine>Maison de la télédétection - 500 rue Jean-François Breton - 34093 Montpellier Cedex 5</addrLine>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-11574" type="direct"/>
            <relation active="#struct-148117" type="direct"/>
            <relation name="UMR1470" active="#struct-302049" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-365824" status="INCOMING">
          <orgName>ITESOFT</orgName>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
        </org>
        <org type="institution" xml:id="struct-11574" status="VALID">
          <idno type="ISNI">0000000121539871</idno>
          <idno type="ROR">https://ror.org/05kpkpg04</idno>
          <orgName>Centre de Coopération Internationale en Recherche Agronomique pour le Développement</orgName>
          <orgName type="acronym">Cirad</orgName>
          <date type="start">1984-06-01</date>
          <desc>
            <address>
              <addrLine>Siège 42, rue Scheffer 75116 Paris</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.cirad.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-148117" status="VALID">
          <idno type="IdRef">139408088</idno>
          <idno type="ROR">https://ror.org/02kbmgc12</idno>
          <orgName>AgroParisTech</orgName>
          <date type="start">2007-01-01</date>
          <desc>
            <address>
              <addrLine>22 place de l'Agronomie CS 20040 91123 Palaiseau cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.agroparistech.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-302049" status="OLD">
          <idno type="ROR">https://ror.org/01wep6g48</idno>
          <orgName>Institut national de recherche en sciences et technologies pour l'environnement et l'agriculture</orgName>
          <orgName type="acronym">IRSTEA</orgName>
          <date type="start">2012-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.irstea.fr</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>