<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-00269698</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-03T02:33:58+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="fr">Données semi-structurées. Découverte, maintenance et analyse de tendances</title>
            <author role="aut">
              <persName>
                <forename type="first">Pierre-Alain</forename>
                <surname>Laur</surname>
              </persName>
              <idno type="halauthorid">491608-0</idno>
              <affiliation ref="#struct-181"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Maguelonne</forename>
                <surname>Teisseire</surname>
              </persName>
              <email type="md5">bfed7f55123bad5a6ddc404f64f1a920</email>
              <email type="domain">teledetection.fr</email>
              <idno type="idhal" notation="string">maguelonne-teisseire</idno>
              <idno type="idhal" notation="numeric">8645</idno>
              <idno type="halauthorid" notation="string">24802-8645</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-9313-6414</idno>
              <idno type="IDREF">https://www.idref.fr/117436593</idno>
              <idno type="VIAF">https://viaf.org/viaf/164498326</idno>
              <idno type="ISNI">http://isni.org/isni/0000000117179295</idno>
              <affiliation ref="#struct-388310"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Pascal</forename>
                <surname>Poncelet</surname>
              </persName>
              <email type="md5">a01013fff28244b30818d056a90083f1</email>
              <email type="domain">ema.fr</email>
              <idno type="idhal" notation="string">pascal-poncelet</idno>
              <idno type="idhal" notation="numeric">6247</idno>
              <idno type="halauthorid" notation="string">23856-6247</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-8277-3490</idno>
              <idno type="IDREF">https://www.idref.fr/069260613</idno>
              <affiliation ref="#struct-214"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Christine</forename>
                <surname>Carvalho De Matos</surname>
              </persName>
              <email type="md5">10103945d6df12b14430343989bb0f6f</email>
              <email type="domain">lirmm.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2018-11-03 22:50:49</date>
              <date type="whenModified">2026-02-12 08:38:01</date>
              <date type="whenReleased">2018-11-03 23:10:29</date>
              <date type="whenProduced">2003</date>
              <date type="whenEndEmbargoed">2018-11-03</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00269698v1/document">
                <date notBefore="2018-11-03"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00269698v1/file/5c34b98ccd04839fa10f47aec91a0fca3c53.pdf" id="file-1911819-1942806">
                <date notBefore="2018-11-03"/>
              </ref>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="103102">
                <persName>
                  <forename>Christine</forename>
                  <surname>Carvalho De Matos</surname>
                </persName>
                <email type="md5">10103945d6df12b14430343989bb0f6f</email>
                <email type="domain">lirmm.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-00269698</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-00269698</idno>
            <idno type="halBibtex">laur:lirmm-00269698</idno>
            <idno type="halRefHtml">&lt;i&gt;Revue des Sciences et Technologies de l'Information - Série ISI : Ingénierie des Systèmes d'Information&lt;/i&gt;, 2003, 8 (5-6), pp.49-78. &lt;a target="_blank" href="https://dx.doi.org/10.3166/isi.8.5-6.49-78"&gt;&amp;#x27E8;10.3166/isi.8.5-6.49-78&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">Revue des Sciences et Technologies de l'Information - Série ISI : Ingénierie des Systèmes d'Information, 2003, 8 (5-6), pp.49-78. &amp;#x27E8;10.3166/isi.8.5-6.49-78&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-1911819-1942806"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="EM-ALES">Ecole des Mines d'Alès</idno>
            <idno type="stamp" n="LIRMM">Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</idno>
            <idno type="stamp" n="MIPS">Mathématiques, Informatique, Physique et Systèmes</idno>
            <idno type="stamp" n="UNIV-MONTPELLIER">Université de Montpellier</idno>
            <idno type="stamp" n="INSTITUTS-TELECOM">composantes instituts telecom </idno>
            <idno type="stamp" n="UM-2015-2021" corresp="UNIV-MONTPELLIER">Université de Montpellier (2015-2021)</idno>
            <idno type="stamp" n="TEST-MATHNUM">Test MathNum</idno>
            <idno type="stamp" n="IMT-MINES-ALES" corresp="INSTITUT-MINES-TELECOM">IMT Mines Alès</idno>
            <idno type="stamp" n="INSTITUT-MINES-TELECOM">Institut Mines Telecom</idno>
          </seriesStmt>
          <notesStmt>
            <note type="commentary">Numéro Spécial "Bases de Données Semi-Structurées"</note>
            <note type="audience" n="3">National</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="fr">Données semi-structurées. Découverte, maintenance et analyse de tendances</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Pierre-Alain</forename>
                    <surname>Laur</surname>
                  </persName>
                  <idno type="halauthorid">491608-0</idno>
                  <affiliation ref="#struct-181"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Maguelonne</forename>
                    <surname>Teisseire</surname>
                  </persName>
                  <email type="md5">bfed7f55123bad5a6ddc404f64f1a920</email>
                  <email type="domain">teledetection.fr</email>
                  <idno type="idhal" notation="string">maguelonne-teisseire</idno>
                  <idno type="idhal" notation="numeric">8645</idno>
                  <idno type="halauthorid" notation="string">24802-8645</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-9313-6414</idno>
                  <idno type="IDREF">https://www.idref.fr/117436593</idno>
                  <idno type="VIAF">https://viaf.org/viaf/164498326</idno>
                  <idno type="ISNI">http://isni.org/isni/0000000117179295</idno>
                  <affiliation ref="#struct-388310"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Pascal</forename>
                    <surname>Poncelet</surname>
                  </persName>
                  <email type="md5">a01013fff28244b30818d056a90083f1</email>
                  <email type="domain">ema.fr</email>
                  <idno type="idhal" notation="string">pascal-poncelet</idno>
                  <idno type="idhal" notation="numeric">6247</idno>
                  <idno type="halauthorid" notation="string">23856-6247</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-8277-3490</idno>
                  <idno type="IDREF">https://www.idref.fr/069260613</idno>
                  <affiliation ref="#struct-214"/>
                </author>
              </analytic>
              <monogr>
                <idno type="halJournalId" status="VALID">20906</idno>
                <idno type="issn">1633-1311</idno>
                <idno type="eissn">2116-7125</idno>
                <title level="j">Revue des Sciences et Technologies de l'Information - Série ISI : Ingénierie des Systèmes d'Information</title>
                <imprint>
                  <publisher>Lavoisier</publisher>
                  <biblScope unit="volume">8</biblScope>
                  <biblScope unit="issue">5-6</biblScope>
                  <biblScope unit="pp">49-78</biblScope>
                  <date type="datePub">2003</date>
                </imprint>
              </monogr>
              <idno type="doi">10.3166/isi.8.5-6.49-78</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="fr">French</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">Semi structured data</term>
                <term xml:lang="en">Knowledge discovery</term>
                <term xml:lang="en">Data sources evolutions</term>
                <term xml:lang="en">Trends</term>
                <term xml:lang="fr">Données semi-structurées</term>
                <term xml:lang="fr">Extraction de connaissances</term>
                <term xml:lang="fr">Évolutions des sources de données</term>
                <term xml:lang="fr">Tendances</term>
              </keywords>
              <classCode scheme="halDomain" n="info.info-db">Computer Science [cs]/Databases [cs.DB]</classCode>
              <classCode scheme="halTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halOldTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halTreeTypology" n="ART">Journal articles</classCode>
            </textClass>
            <abstract xml:lang="fr">
              <p>La recherche de connaissances dans des données structurées a fait l'objet de nombreux travaux de recherche ces dernières années. Cependant, avec la popularité du Web, le nombre de documents semi structurés augmente très rapidement et il est judicieux de penser qu'une requête sur la structure des documents devient aussi importante qu'une requête sur les données elles mêmes. Dans cet article nous proposons une approche pour extraire de telles sous structures. De plus, les données évoluant sans cesse, nous étendons l'approche pour prendre en compte l'évolution de ces données sources dans le cadre d'un processus d'extraction. Enfin, nous montrons qu'il est possible d'analyser finement les tendances au cours des différentes évolutions des données sources. ABSTRACT. Mining knowledge from structured data has been extensively addressed in the few past years. However, with the growing popularity of the Web, the number of semi structured documents available is rapidly increasing and it is judicious to assume that a query on document structure is almost as important as a query on data. In this paper, we propose an approach to extract such structures. Moreover, manipulated data is constantly being updated; we extend our approach to take into account source evolutions in a knowledge extraction process. Finally, we show that it is possible to analyze trends during the different data sources evolutions. MOTS-CLÉS : données semi structurées, extraction de connaissances, évolutions des sources de données, tendances.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="researchteam" xml:id="struct-388310" status="OLD">
          <orgName>Fouille de données environnementales</orgName>
          <orgName type="acronym">TATOO</orgName>
          <date type="end">2013</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-214" status="OLD">
          <idno type="IdRef">071473114</idno>
          <idno type="ISNI">0000 0004 0372 9024</idno>
          <idno type="RNSR">199420609G</idno>
          <orgName>Laboratoire de Génie Informatique et Ingénierie de Production</orgName>
          <orgName type="acronym">LGI2P</orgName>
          <date type="start">1994-01-01</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <addrLine>Ecole des Mines d'Alès-Laboratoire de Génie Informatique et d'Ingénierie de ProductionSite de Croupillac7 rue Jules RenardAlès - 30319 CedexFrance</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.lgi2p.ema.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-6279" type="direct"/>
            <relation active="#struct-302102" type="indirect"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-6279" status="VALID">
          <idno type="IdRef">032486111</idno>
          <idno type="ISNI">0000 0000 9734 247X</idno>
          <idno type="ROR">https://ror.org/03e8rf594</idno>
          <orgName>IMT MINES ALÈS</orgName>
          <date type="start">1843-09-22</date>
          <desc>
            <address>
              <addrLine>IMT - Mines Ales École des MinesSite de Clavières : 6 avenue de Clavières, 30319 Alès CedexSite de Croupillac : Rue Jules Renard, 30100 AlèsSite de Pau : Hélioparc (bâtiment Einstein 1), 2 avenue du Président-Pierre-Angot, 64053 Pau Cedex 9</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.mines-ales.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-302102" type="direct"/>
          </listRelation>
        </org>
        <org type="regroupinstitution" xml:id="struct-302102" status="VALID">
          <idno type="IdRef">192427156</idno>
          <idno type="ISNI">000000012202567X</idno>
          <idno type="ROR">https://ror.org/025vp2923</idno>
          <idno type="Wikidata">Q27962533</idno>
          <orgName>Institut Mines-Télécom [Paris]</orgName>
          <orgName type="acronym">IMT</orgName>
          <date type="start">2012-03-01</date>
          <desc>
            <address>
              <addrLine>19 Place Marguerite Perey, 91120 Palaiseau</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.imt.fr/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>