<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-01239231</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-06T12:59:44+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Semantic-Based Multilingual Document Clustering via Tensor Modeling</title>
            <author role="crp">
              <persName>
                <forename type="first">Salvatore</forename>
                <surname>Romeo</surname>
              </persName>
              <idno type="idhal" notation="numeric">973605</idno>
              <idno type="halauthorid" notation="string">972691-973605</idno>
              <affiliation ref="#struct-242521"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Andrea</forename>
                <surname>Tagarelli</surname>
              </persName>
              <idno type="halauthorid">863521-0</idno>
              <affiliation ref="#struct-242521"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Dino</forename>
                <surname>Ienco</surname>
              </persName>
              <email type="md5">b326eb4334c6ba52cdd2279add798afc</email>
              <email type="domain">inrae.fr</email>
              <idno type="idhal" notation="string">dino-ienco</idno>
              <idno type="idhal" notation="numeric">6226</idno>
              <idno type="halauthorid" notation="string">23855-6226</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-8736-3132</idno>
              <idno type="IDREF">https://www.idref.fr/172688183</idno>
              <idno type="GOOGLE SCHOLAR">https://scholar.google.fr/citations?user=C8zfH3kAAAAJ</idno>
              <affiliation ref="#struct-568971"/>
              <affiliation ref="#struct-409262"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Dino</forename>
                <surname>Ienco</surname>
              </persName>
              <email type="md5">b326eb4334c6ba52cdd2279add798afc</email>
              <email type="domain">inrae.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2015-12-07 15:23:53</date>
              <date type="whenModified">2025-03-21 15:14:31</date>
              <date type="whenReleased">2015-12-08 11:02:48</date>
              <date type="whenProduced">2014-10-25</date>
              <date type="whenEndEmbargoed">2015-12-07</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-01239231v1/document">
                <date notBefore="2015-12-07"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-01239231v1/file/585_Paper.pdf" id="file-1239231-1317327">
                <date notBefore="2015-12-07"/>
              </ref>
              <ref type="externalLink" target="https://doi.org/10.3115/v1/d14-1065"/>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="327541">
                <persName>
                  <forename>Dino</forename>
                  <surname>Ienco</surname>
                </persName>
                <email type="md5">b326eb4334c6ba52cdd2279add798afc</email>
                <email type="domain">inrae.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-01239231</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-01239231</idno>
            <idno type="halBibtex">romeo:lirmm-01239231</idno>
            <idno type="halRefHtml">&lt;i&gt;EMNLP: Empirical Methods in Natural Language Processing&lt;/i&gt;, Oct 2014, Doha, Qatar. pp.600-609, &lt;a target="_blank" href="https://dx.doi.org/10.3115/v1/D14-1065"&gt;&amp;#x27E8;10.3115/v1/D14-1065&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">EMNLP: Empirical Methods in Natural Language Processing, Oct 2014, Doha, Qatar. pp.600-609, &amp;#x27E8;10.3115/v1/D14-1065&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-1239231-1317327"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CIRAD">CIRAD - Centre de coopération internationale en recherche agronomique pour le développement</idno>
            <idno type="stamp" n="AGROPARISTECH">AgroParisTech</idno>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="IRSTEA">IRSTEA - Institut national de recherche en sciences et technologies pour l'environnement et l'agriculture (&lt;b&gt;anciennement Cemagref&lt;/b&gt;)</idno>
            <idno type="stamp" n="ADVANSE" corresp="LIRMM">ADVanced Analytics for data SciencE</idno>
            <idno type="stamp" n="LIRMM">Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</idno>
            <idno type="stamp" n="TETIS">TETIS</idno>
            <idno type="stamp" n="AGREENIUM">Archive ouverte en agrobiosciences</idno>
            <idno type="stamp" n="MIPS">Mathématiques, Informatique, Physique et Systèmes</idno>
            <idno type="stamp" n="UNIV-MONTPELLIER">Université de Montpellier</idno>
            <idno type="stamp" n="INRAE">Institut National de Recherche en Agriculture, Alimentation et Environnement</idno>
            <idno type="stamp" n="INRAEOCCITANIEMONTPELLIER" corresp="INRAE">INRAE Occitanie Montpellier</idno>
            <idno type="stamp" n="UM-2015-2021" corresp="UNIV-MONTPELLIER">Université de Montpellier (2015-2021)</idno>
            <idno type="stamp" n="MATHNUM">Département MathNum</idno>
            <idno type="stamp" n="TEST-MATHNUM">Test MathNum</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="invited" n="0">No</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
            <note type="proceedings" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Semantic-Based Multilingual Document Clustering via Tensor Modeling</title>
                <author role="crp">
                  <persName>
                    <forename type="first">Salvatore</forename>
                    <surname>Romeo</surname>
                  </persName>
                  <idno type="idhal" notation="numeric">973605</idno>
                  <idno type="halauthorid" notation="string">972691-973605</idno>
                  <affiliation ref="#struct-242521"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Andrea</forename>
                    <surname>Tagarelli</surname>
                  </persName>
                  <idno type="halauthorid">863521-0</idno>
                  <affiliation ref="#struct-242521"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Dino</forename>
                    <surname>Ienco</surname>
                  </persName>
                  <email type="md5">b326eb4334c6ba52cdd2279add798afc</email>
                  <email type="domain">inrae.fr</email>
                  <idno type="idhal" notation="string">dino-ienco</idno>
                  <idno type="idhal" notation="numeric">6226</idno>
                  <idno type="halauthorid" notation="string">23855-6226</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-8736-3132</idno>
                  <idno type="IDREF">https://www.idref.fr/172688183</idno>
                  <idno type="GOOGLE SCHOLAR">https://scholar.google.fr/citations?user=C8zfH3kAAAAJ</idno>
                  <affiliation ref="#struct-568971"/>
                  <affiliation ref="#struct-409262"/>
                </author>
              </analytic>
              <monogr>
                <title level="m">Conference on Empirical Methods in Natural Language Processing</title>
                <meeting>
                  <title>EMNLP: Empirical Methods in Natural Language Processing</title>
                  <date type="start">2014-10-25</date>
                  <date type="end">2014-10-29</date>
                  <settlement>Doha</settlement>
                  <country key="QA">Qatar</country>
                </meeting>
                <imprint>
                  <biblScope unit="pp">600-609</biblScope>
                  <date type="datePub">2014</date>
                </imprint>
              </monogr>
              <idno type="doi">10.3115/v1/D14-1065</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <classCode scheme="halDomain" n="info.info-ir">Computer Science [cs]/Information Retrieval [cs.IR]</classCode>
              <classCode scheme="halDomain" n="info.info-lg">Computer Science [cs]/Machine Learning [cs.LG]</classCode>
              <classCode scheme="halDomain" n="info.info-db">Computer Science [cs]/Databases [cs.DB]</classCode>
              <classCode scheme="halTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halOldTypology" n="COMM">Conference papers</classCode>
              <classCode scheme="halTreeTypology" n="COMM">Conference papers</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>A major challenge in document clustering research arises from the growing amount of text data written in different languages. Previous approaches depend on language-specific solutions (e.g., bilingual dictionaries, sequential machine translation) to evaluate document similarities, and the required transformations may alter the original document semantics. To cope with this issue we propose a new document clustering approach for multilingual corpora that (i) exploits a large-scale multilingual knowledge base, (ii) takes advantage of the multi-topic nature of the text documents, and (iii) employs a tensor-based model to deal with high dimensionality and sparseness. Results have shown the significance of our approach and its better performance w.r.t. classic document clustering approaches, in both a balanced and an unbalanced corpus evaluation.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="laboratory" xml:id="struct-242521" status="VALID">
          <orgName>Dipartimento di Ingegneria Informatica, Modellistica, Elettronica e Sistemistica [Calabria]</orgName>
          <orgName type="acronym">DIMES</orgName>
          <desc>
            <address>
              <country key="IT"/>
            </address>
            <ref type="url">http://www.dimes.unical.it/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-103704" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-568971" status="OLD">
          <orgName>Territoires, Environnement, Télédétection et Information Spatiale</orgName>
          <orgName type="acronym">UMR TETIS</orgName>
          <date type="start">2012-01-01</date>
          <date type="end">2015-12-31</date>
          <desc>
            <address>
              <addrLine>Maison de la télédétection - 500 rue Jean-François Breton - 34093 Montpellier Cedex 5</addrLine>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-11574" type="direct"/>
            <relation active="#struct-148117" type="direct"/>
            <relation name="UMR1470" active="#struct-302049" type="direct"/>
          </listRelation>
        </org>
        <org type="researchteam" xml:id="struct-409262" status="OLD">
          <orgName>ADVanced Analytics for data SciencE</orgName>
          <orgName type="acronym">ADVANSE</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/ADVANSE/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-103704" status="VALID">
          <idno type="IdRef">028882695</idno>
          <idno type="ISNI">0000000122897750</idno>
          <idno type="ROR">https://ror.org/02rc97e94</idno>
          <idno type="Wikidata">Q1752540</idno>
          <orgName>Università della Calabria [Arcavacata di Rende, Italia] = University of Calabria [Italy] = Université de Calabre [Italie]</orgName>
          <orgName type="acronym">UniCal</orgName>
          <desc>
            <address>
              <addrLine>Campus di Arcavacata via Pietro Bucci 87036 Arcavacata di Rende (CS)</addrLine>
              <country key="IT"/>
            </address>
            <ref type="url">http://www.unical.it/portale/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-11574" status="VALID">
          <idno type="ISNI">0000000121539871</idno>
          <idno type="ROR">https://ror.org/05kpkpg04</idno>
          <orgName>Centre de Coopération Internationale en Recherche Agronomique pour le Développement</orgName>
          <orgName type="acronym">Cirad</orgName>
          <date type="start">1984-06-01</date>
          <desc>
            <address>
              <addrLine>Siège 42, rue Scheffer 75116 Paris</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.cirad.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-148117" status="VALID">
          <idno type="IdRef">139408088</idno>
          <idno type="ROR">https://ror.org/02kbmgc12</idno>
          <orgName>AgroParisTech</orgName>
          <date type="start">2007-01-01</date>
          <desc>
            <address>
              <addrLine>22 place de l'Agronomie CS 20040 91123 Palaiseau cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.agroparistech.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-302049" status="OLD">
          <idno type="ROR">https://ror.org/01wep6g48</idno>
          <orgName>Institut national de recherche en sciences et technologies pour l'environnement et l'agriculture</orgName>
          <orgName type="acronym">IRSTEA</orgName>
          <date type="start">2012-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.irstea.fr</ref>
          </desc>
        </org>
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>