<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-00950983v1</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-06T19:18:57+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">From Indexing Data Structures to de Bruijn Graphs</title>
            <author role="aut">
              <persName>
                <forename type="first">Bastien</forename>
                <surname>Cazaux</surname>
              </persName>
              <email type="md5">9801dd5f380a8ef2148c88eb68f25fc8</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">cazaux-bastien</idno>
              <idno type="idhal" notation="numeric">3129</idno>
              <idno type="halauthorid" notation="string">23494-3129</idno>
              <idno type="IDREF">https://www.idref.fr/226877523</idno>
              <affiliation ref="#struct-388224"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Thierry</forename>
                <surname>Lecroq</surname>
              </persName>
              <email type="md5">73a06c28c0c1674df8a2929677efdf11</email>
              <email type="domain">univ-rouen.fr</email>
              <idno type="idhal" notation="string">thierry-lecroq</idno>
              <idno type="idhal" notation="numeric">21817</idno>
              <idno type="halauthorid" notation="string">10003-21817</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-1900-3397</idno>
              <idno type="IDREF">https://www.idref.fr/059058463</idno>
              <affiliation ref="#struct-23832"/>
            </author>
            <author role="crp">
              <persName>
                <forename type="first">Eric</forename>
                <surname>Rivals</surname>
              </persName>
              <email type="md5">2948fa0e637f8fdead28884bcc311d7f</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">eric-rivals</idno>
              <idno type="idhal" notation="numeric">2002</idno>
              <idno type="halauthorid" notation="string">23559-2002</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-3791-3973</idno>
              <idno type="IDREF">https://www.idref.fr/118021850</idno>
              <affiliation ref="#struct-213159"/>
              <affiliation ref="#struct-388224"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Eric</forename>
                <surname>Rivals</surname>
              </persName>
              <email type="md5">18b1a627af6df8b912fd128ee2d82dee</email>
              <email type="domain">lirmm.fr</email>
            </editor>
            <funder ref="#projanr-23506"/>
            <funder>Defi MASTODONS SePhHaDe from CNRS (http://www.lirmm.fr/mastodons)</funder>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2014-02-25 15:01:48</date>
              <date type="whenWritten">2013-11-20</date>
              <date type="whenModified">2025-03-21 15:12:21</date>
              <date type="whenReleased">2014-02-25 16:51:57</date>
              <date type="whenProduced">2014-02-20</date>
              <date type="whenEndEmbargoed">2014-02-25</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00950983v1/document">
                <date notBefore="2014-02-25"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-00950983v1/file/STSA-DBG-RR.pdf" id="file-950983-1112866">
                <date notBefore="2014-02-25"/>
              </ref>
            </edition>
            <edition n="v2">
              <date type="whenSubmitted">2014-03-05 19:31:18</date>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="113912">
                <persName>
                  <forename>Eric</forename>
                  <surname>Rivals</surname>
                </persName>
                <email type="md5">18b1a627af6df8b912fd128ee2d82dee</email>
                <email type="domain">lirmm.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-00950983</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-00950983</idno>
            <idno type="halBibtex">cazaux:lirmm-00950983</idno>
            <idno type="halRefHtml">RR-14004, 2014, pp.14</idno>
            <idno type="halRef">RR-14004, 2014, pp.14</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-950983-1112866"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt/>
          <notesStmt>
            <note type="audience" n="1">Not set</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">From Indexing Data Structures to de Bruijn Graphs</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Bastien</forename>
                    <surname>Cazaux</surname>
                  </persName>
                  <email type="md5">9801dd5f380a8ef2148c88eb68f25fc8</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">cazaux-bastien</idno>
                  <idno type="idhal" notation="numeric">3129</idno>
                  <idno type="halauthorid" notation="string">23494-3129</idno>
                  <idno type="IDREF">https://www.idref.fr/226877523</idno>
                  <affiliation ref="#struct-388224"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Thierry</forename>
                    <surname>Lecroq</surname>
                  </persName>
                  <email type="md5">73a06c28c0c1674df8a2929677efdf11</email>
                  <email type="domain">univ-rouen.fr</email>
                  <idno type="idhal" notation="string">thierry-lecroq</idno>
                  <idno type="idhal" notation="numeric">21817</idno>
                  <idno type="halauthorid" notation="string">10003-21817</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-1900-3397</idno>
                  <idno type="IDREF">https://www.idref.fr/059058463</idno>
                  <affiliation ref="#struct-23832"/>
                </author>
                <author role="crp">
                  <persName>
                    <forename type="first">Eric</forename>
                    <surname>Rivals</surname>
                  </persName>
                  <email type="md5">2948fa0e637f8fdead28884bcc311d7f</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">eric-rivals</idno>
                  <idno type="idhal" notation="numeric">2002</idno>
                  <idno type="halauthorid" notation="string">23559-2002</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-3791-3973</idno>
                  <idno type="IDREF">https://www.idref.fr/118021850</idno>
                  <affiliation ref="#struct-213159"/>
                  <affiliation ref="#struct-388224"/>
                </author>
              </analytic>
              <monogr>
                <idno type="reportNumber">RR-14004</idno>
                <meeting>
                  <settlement>Montpellier (France)</settlement>
                </meeting>
                <imprint>
                  <biblScope unit="pp">14</biblScope>
                  <date type="datePub">2014-02-20</date>
                </imprint>
              </monogr>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">suffix tree</term>
                <term xml:lang="en">suffix array</term>
                <term xml:lang="en">assembly</term>
                <term xml:lang="en">index data structures</term>
                <term xml:lang="en">dynamic update</term>
                <term xml:lang="en">k-mer</term>
                <term xml:lang="en">overlap</term>
                <term xml:lang="en">contracted de Bruijn graph</term>
                <term xml:lang="en">construction algorithms</term>
              </keywords>
              <classCode scheme="halDomain" n="info.info-bi">Computer Science [cs]/Bioinformatics [q-bio.QM]</classCode>
              <classCode scheme="halDomain" n="sdv.bibs">Life Sciences [q-bio]/Quantitative Methods [q-bio.QM]</classCode>
              <classCode scheme="halDomain" n="info.info-ds">Computer Science [cs]/Data Structures and Algorithms [cs.DS]</classCode>
              <classCode scheme="halTypology" n="REPORT">Reports</classCode>
              <classCode scheme="halOldTypology" n="REPORT">Reports</classCode>
              <classCode scheme="halTreeTypology" n="REPORT">Reports</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>New technologies have tremendously increased sequencing throughput compared to traditional techniques, thereby complicating DNA assembly. Hence, assembly programs resort to de Bruijn graphs (dBG) of $k$-mers of short reads to compute a set of long contigs, each being a putative segment of the sequenced molecule. Other types of DNA sequence analysis, as well as preprocessing of the reads for assembly, use classical data structures to index all substrings of the reads. It is thus interesting to exhibit algorithms that directly build a de Bruijn graph of order $k$ from a pre-existing index, and especially a contracted version of the de Bruijn graph, where non branching paths are condensed into single nodes. Here, we formalise the relationship between suffix trees/arrays and dBGs, and exhibit linear time algorithms for constructing the full or contracted de Bruijn graphs. Finally, we provide hints explaining why this bridge between indexes and dBGs enables to dynamically update the order $k$ of the graph.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-388224" status="OLD">
          <orgName>Méthodes et Algorithmes pour la Bioinformatique</orgName>
          <orgName type="acronym">MAB</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr/equipes/MAB/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-23832" status="OLD">
          <idno type="IdRef">110731867</idno>
          <idno type="RNSR">200615352R</idno>
          <orgName>Laboratoire d'Informatique, de Traitement de l'Information et des Systèmes</orgName>
          <orgName type="acronym">LITIS</orgName>
          <date type="start">2006-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>Avenue de l'Université 76800 Saint-Étienne-du-Rouvray</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.litislab.eu</ref>
          </desc>
          <listRelation>
            <relation active="#struct-300317" type="direct"/>
            <relation active="#struct-455934" type="indirect"/>
            <relation name="EA4108" active="#struct-300318" type="direct"/>
            <relation active="#struct-301288" type="direct"/>
            <relation active="#struct-301232" type="indirect"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-213159" status="OLD">
          <orgName>Institut de Biologie Computationnelle</orgName>
          <orgName type="acronym">IBC</orgName>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <addrLine>95 rue de la Galéra, 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.ibc-montpellier.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-92114" type="direct"/>
            <relation active="#struct-300009" type="direct"/>
            <relation active="#struct-410122" type="direct"/>
            <relation active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-300317" status="VALID">
          <idno type="IdRef">031308570</idno>
          <idno type="ISNI">0000000121731046</idno>
          <idno type="ROR">https://ror.org/05v509s40</idno>
          <orgName>Université Le Havre Normandie</orgName>
          <orgName type="acronym">ULH</orgName>
          <date type="start">1984-08-27</date>
          <desc>
            <address>
              <addrLine>25 rue Philippe Lebon - BP 1123 - 76063 Le Havre Cedex France</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.univ-lehavre.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-455934" type="direct"/>
          </listRelation>
        </org>
        <org type="regroupinstitution" xml:id="struct-455934" status="VALID">
          <idno type="IdRef">190906332</idno>
          <idno type="ISNI">0000000417859671 </idno>
          <idno type="ROR">https://ror.org/01k40cz91</idno>
          <orgName>Normandie Université</orgName>
          <orgName type="acronym">NU</orgName>
          <date type="start">2015-01-01</date>
          <desc>
            <address>
              <addrLine>Esplanade de la Paix - CS 14032 - 14032 Caen Cedex 5</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.normandie-univ.fr/</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-300318" status="VALID">
          <idno type="IdRef">026403919</idno>
          <idno type="ISNI">0000000121083034</idno>
          <idno type="ROR">https://ror.org/03nhjew95</idno>
          <orgName>Université de Rouen Normandie</orgName>
          <orgName type="acronym">UNIROUEN</orgName>
          <date type="start">1966-01-01</date>
          <desc>
            <address>
              <addrLine>1, rue Thomas Becket 76821 Mont-Saint-Aignan Cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.univ-rouen.fr/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-455934" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-301288" status="VALID">
          <idno type="IdRef">033364346</idno>
          <idno type="ISNI">0000000121908462</idno>
          <idno type="ROR">https://ror.org/020ws7586</idno>
          <orgName>Institut national des sciences appliquées Rouen Normandie</orgName>
          <orgName type="acronym">INSA Rouen Normandie</orgName>
          <date type="start">1985-01-01</date>
          <desc>
            <address>
              <addrLine>Avenue de l’Université 76801 Saint-Étienne-du-Rouvray Cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.insa-rouen.fr</ref>
          </desc>
          <listRelation>
            <relation active="#struct-301232" type="direct"/>
            <relation active="#struct-455934" type="direct"/>
          </listRelation>
        </org>
        <org type="regroupinstitution" xml:id="struct-301232" status="VALID">
          <idno type="IdRef">162105150</idno>
          <orgName>Institut National des Sciences Appliquées</orgName>
          <orgName type="acronym">INSA</orgName>
          <desc>
            <address>
              <country key="FR"/>
            </address>
          </desc>
        </org>
        <org type="institution" xml:id="struct-92114" status="OLD">
          <idno type="ROR">https://ror.org/01x3gbx83</idno>
          <orgName>Institut National de la Recherche Agronomique</orgName>
          <orgName type="acronym">INRA</orgName>
          <date type="start">1946-05-18</date>
          <date type="end">2019-12-31</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.inra.fr</ref>
          </desc>
        </org>
        <org type="institution" xml:id="struct-300009" status="VALID">
          <idno type="ROR">https://ror.org/02kvxyf05</idno>
          <orgName>Institut National de Recherche en Informatique et en Automatique</orgName>
          <orgName type="acronym">Inria</orgName>
          <desc>
            <address>
              <addrLine>Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.inria.fr/en/</ref>
          </desc>
        </org>
      </listOrg>
      <listOrg type="projects">
        <org type="anrProject" xml:id="projanr-23506" status="VALID">
          <idno type="anr">ANR-12-BS02-0008</idno>
          <idno type="program">BLANC</idno>
          <orgName>Colib'read</orgName>
          <desc>Méthodes d'extraction d'information biologique dans les données HTS non assemblées</desc>
          <date type="start">2012</date>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>