<?xml version="1.0" encoding="utf-8"?>
<TEI xmlns="http://www.tei-c.org/ns/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:hal="http://hal.archives-ouvertes.fr/" xmlns:gml="http://www.opengis.net/gml/3.3/" xmlns:gmlce="http://www.opengis.net/gml/3.3/ce" version="1.1" xsi:schemaLocation="http://www.tei-c.org/ns/1.0 http://api.archives-ouvertes.fr/documents/aofr-sword.xsd">
  <teiHeader>
    <fileDesc>
      <titleStmt>
        <title>HAL TEI export of lirmm-02045144</title>
      </titleStmt>
      <publicationStmt>
        <distributor>CCSD</distributor>
        <availability status="restricted">
          <licence target="https://creativecommons.org/publicdomain/zero/1.0/">CC0 1.0 - Universal</licence>
        </availability>
        <date when="2026-05-03T01:38:16+02:00"/>
      </publicationStmt>
      <sourceDesc>
        <p part="N">HAL API Platform</p>
      </sourceDesc>
    </fileDesc>
  </teiHeader>
  <text>
    <body>
      <listBibl>
        <biblFull>
          <titleStmt>
            <title xml:lang="en">Parallel Computation of PDFs on Big Spatial Data Using Spark</title>
            <author role="aut">
              <persName>
                <forename type="first">Ji</forename>
                <surname>Liu</surname>
              </persName>
              <email type="md5">5d0ed92f5d94bad6f1d4ad63b663c80a</email>
              <email type="domain">inria.fr</email>
              <idno type="idhal" notation="numeric">958433</idno>
              <idno type="halauthorid" notation="string">787622-958433</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-4710-5697</idno>
              <affiliation ref="#struct-141072"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Noel</forename>
                <surname>Moreno Lemus</surname>
              </persName>
              <idno type="halauthorid">1530149-0</idno>
              <affiliation ref="#struct-4626"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Esther</forename>
                <surname>Pacitti</surname>
              </persName>
              <email type="md5">acecebe7f7a9d082b33e8bd2d2d07e73</email>
              <email type="domain">lirmm.fr</email>
              <idno type="idhal" notation="string">esther-pacitti</idno>
              <idno type="idhal" notation="numeric">3271</idno>
              <idno type="halauthorid" notation="string">2253-3271</idno>
              <idno type="ORCID">https://orcid.org/0000-0003-1370-9943</idno>
              <idno type="IDREF">https://www.idref.fr/117946451</idno>
              <affiliation ref="#struct-141072"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Fabio</forename>
                <surname>Porto</surname>
              </persName>
              <email type="md5">d48ac90feaf7aadd7911e48f9e1f0abc</email>
              <email type="domain">lncc.br</email>
              <idno type="idhal" notation="numeric">932292</idno>
              <idno type="halauthorid" notation="string">433850-932292</idno>
              <idno type="ORCID">https://orcid.org/0000-0002-4597-4832</idno>
              <affiliation ref="#struct-4626"/>
            </author>
            <author role="aut">
              <persName>
                <forename type="first">Patrick</forename>
                <surname>Valduriez</surname>
              </persName>
              <email type="md5">b7903099e0d3ee0b492cd1c7a982e35b</email>
              <email type="domain">inria.fr</email>
              <idno type="idhal" notation="string">patrick-valduriez</idno>
              <idno type="idhal" notation="numeric">172604</idno>
              <idno type="halauthorid" notation="string">22529-172604</idno>
              <idno type="ORCID">https://orcid.org/0000-0001-6506-7538</idno>
              <idno type="GOOGLE SCHOLAR">https://scholar.google.fr/citations?user=Vj0m2A0AAAAJ&amp;hl=fr</idno>
              <idno type="IDREF">https://www.idref.fr/028314417</idno>
              <orgName ref="#struct-300009"/>
              <affiliation ref="#struct-141072"/>
            </author>
            <editor role="depositor">
              <persName>
                <forename>Patrick</forename>
                <surname>Valduriez</surname>
              </persName>
              <email type="md5">b7903099e0d3ee0b492cd1c7a982e35b</email>
              <email type="domain">inria.fr</email>
            </editor>
          </titleStmt>
          <editionStmt>
            <edition n="v1" type="current">
              <date type="whenSubmitted">2019-02-21 18:23:53</date>
              <date type="whenModified">2025-08-26 15:21:01</date>
              <date type="whenReleased">2019-02-22 08:49:09</date>
              <date type="whenProduced">2020</date>
              <date type="whenEndEmbargoed">2019-02-21</date>
              <ref type="file" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-02045144v1/document">
                <date notBefore="2019-02-21"/>
              </ref>
              <ref type="file" subtype="author" n="1" target="https://hal-lirmm.ccsd.cnrs.fr/lirmm-02045144v1/file/DAPDauthorVersion.pdf" id="file-2045144-2052058">
                <date notBefore="2019-02-21"/>
              </ref>
              <ref type="externalLink" target="http://arxiv.org/pdf/1805.03141"/>
            </edition>
            <respStmt>
              <resp>contributor</resp>
              <name key="150418">
                <persName>
                  <forename>Patrick</forename>
                  <surname>Valduriez</surname>
                </persName>
                <email type="md5">b7903099e0d3ee0b492cd1c7a982e35b</email>
                <email type="domain">inria.fr</email>
              </name>
            </respStmt>
          </editionStmt>
          <publicationStmt>
            <distributor>CCSD</distributor>
            <idno type="halId">lirmm-02045144</idno>
            <idno type="halUri">https://hal-lirmm.ccsd.cnrs.fr/lirmm-02045144</idno>
            <idno type="halBibtex">liu:lirmm-02045144</idno>
            <idno type="halRefHtml">&lt;i&gt;Distributed and Parallel Databases&lt;/i&gt;, 2020, 38, pp.63-100. &lt;a target="_blank" href="https://dx.doi.org/10.1007/s10619-019-07260-3"&gt;&amp;#x27E8;10.1007/s10619-019-07260-3&amp;#x27E9;&lt;/a&gt;</idno>
            <idno type="halRef">Distributed and Parallel Databases, 2020, 38, pp.63-100. &amp;#x27E8;10.1007/s10619-019-07260-3&amp;#x27E9;</idno>
            <availability status="restricted">
              <licence target="https://about.hal.science/hal-authorisation-v1/">HAL Authorization<ref corresp="#file-2045144-2052058"/></licence>
            </availability>
          </publicationStmt>
          <seriesStmt>
            <idno type="stamp" n="CNRS">CNRS - Centre national de la recherche scientifique</idno>
            <idno type="stamp" n="INRIA">INRIA - Institut National de Recherche en Informatique et en Automatique</idno>
            <idno type="stamp" n="INRIA-SOPHIA">INRIA Sophia Antipolis - Méditerranée</idno>
            <idno type="stamp" n="INRIASO">INRIA-SOPHIA</idno>
            <idno type="stamp" n="INRIA_TEST">INRIA - Institut National de Recherche en Informatique et en Automatique</idno>
            <idno type="stamp" n="INRIA34">Antenne Inria de l'université de Montpellier</idno>
            <idno type="stamp" n="GRID5000">Grid'5000</idno>
            <idno type="stamp" n="TESTALAIN1">TESTALAIN1</idno>
            <idno type="stamp" n="ZENITH" corresp="LIRMM">Scientific Data Management</idno>
            <idno type="stamp" n="LIRMM">Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</idno>
            <idno type="stamp" n="INRIA2">INRIA 2</idno>
            <idno type="stamp" n="MIPS">Mathématiques, Informatique, Physique et Systèmes</idno>
            <idno type="stamp" n="UNIV-MONTPELLIER">Université de Montpellier</idno>
            <idno type="stamp" n="UNIV-COTEDAZUR">Université Côte d'Azur</idno>
            <idno type="stamp" n="INRIA-300009">Inria 300009</idno>
            <idno type="stamp" n="UM-2015-2021" corresp="UNIV-MONTPELLIER">Université de Montpellier (2015-2021)</idno>
            <idno type="stamp" n="INRIAARTDOI">INRIAARTDOI</idno>
            <idno type="stamp" n="INRIA-BRASIL">Inria-Brasil</idno>
            <idno type="stamp" n="SLICES-FR">Publications from users of the SILECS research infrastructure</idno>
          </seriesStmt>
          <notesStmt>
            <note type="audience" n="2">International</note>
            <note type="popular" n="0">No</note>
            <note type="peer" n="1">Yes</note>
          </notesStmt>
          <sourceDesc>
            <biblStruct>
              <analytic>
                <title xml:lang="en">Parallel Computation of PDFs on Big Spatial Data Using Spark</title>
                <author role="aut">
                  <persName>
                    <forename type="first">Ji</forename>
                    <surname>Liu</surname>
                  </persName>
                  <email type="md5">5d0ed92f5d94bad6f1d4ad63b663c80a</email>
                  <email type="domain">inria.fr</email>
                  <idno type="idhal" notation="numeric">958433</idno>
                  <idno type="halauthorid" notation="string">787622-958433</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-4710-5697</idno>
                  <affiliation ref="#struct-141072"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Noel</forename>
                    <surname>Moreno Lemus</surname>
                  </persName>
                  <idno type="halauthorid">1530149-0</idno>
                  <affiliation ref="#struct-4626"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Esther</forename>
                    <surname>Pacitti</surname>
                  </persName>
                  <email type="md5">acecebe7f7a9d082b33e8bd2d2d07e73</email>
                  <email type="domain">lirmm.fr</email>
                  <idno type="idhal" notation="string">esther-pacitti</idno>
                  <idno type="idhal" notation="numeric">3271</idno>
                  <idno type="halauthorid" notation="string">2253-3271</idno>
                  <idno type="ORCID">https://orcid.org/0000-0003-1370-9943</idno>
                  <idno type="IDREF">https://www.idref.fr/117946451</idno>
                  <affiliation ref="#struct-141072"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Fabio</forename>
                    <surname>Porto</surname>
                  </persName>
                  <email type="md5">d48ac90feaf7aadd7911e48f9e1f0abc</email>
                  <email type="domain">lncc.br</email>
                  <idno type="idhal" notation="numeric">932292</idno>
                  <idno type="halauthorid" notation="string">433850-932292</idno>
                  <idno type="ORCID">https://orcid.org/0000-0002-4597-4832</idno>
                  <affiliation ref="#struct-4626"/>
                </author>
                <author role="aut">
                  <persName>
                    <forename type="first">Patrick</forename>
                    <surname>Valduriez</surname>
                  </persName>
                  <email type="md5">b7903099e0d3ee0b492cd1c7a982e35b</email>
                  <email type="domain">inria.fr</email>
                  <idno type="idhal" notation="string">patrick-valduriez</idno>
                  <idno type="idhal" notation="numeric">172604</idno>
                  <idno type="halauthorid" notation="string">22529-172604</idno>
                  <idno type="ORCID">https://orcid.org/0000-0001-6506-7538</idno>
                  <idno type="GOOGLE SCHOLAR">https://scholar.google.fr/citations?user=Vj0m2A0AAAAJ&amp;hl=fr</idno>
                  <idno type="IDREF">https://www.idref.fr/028314417</idno>
                  <orgName ref="#struct-300009"/>
                  <affiliation ref="#struct-141072"/>
                </author>
              </analytic>
              <monogr>
                <idno type="halJournalId" status="VALID">12634</idno>
                <idno type="issn">0926-8782</idno>
                <idno type="eissn">1573-7578</idno>
                <title level="j">Distributed and Parallel Databases</title>
                <imprint>
                  <publisher>Springer</publisher>
                  <biblScope unit="volume">38</biblScope>
                  <biblScope unit="pp">63-100</biblScope>
                  <date type="datePub">2020</date>
                </imprint>
              </monogr>
              <idno type="doi">10.1007/s10619-019-07260-3</idno>
            </biblStruct>
          </sourceDesc>
          <profileDesc>
            <langUsage>
              <language ident="en">English</language>
            </langUsage>
            <textClass>
              <keywords scheme="author">
                <term xml:lang="en">Parallel processing</term>
                <term xml:lang="en">Spark</term>
                <term xml:lang="en">Big data</term>
                <term xml:lang="en">Spatial data</term>
              </keywords>
              <classCode scheme="https://dl.acm.org/ccs" n="ACM2012.H.0.2.4"/>
              <classCode scheme="halDomain" n="info.info-db">Computer Science [cs]/Databases [cs.DB]</classCode>
              <classCode scheme="halDomain" n="info.info-dc">Computer Science [cs]/Distributed, Parallel, and Cluster Computing [cs.DC]</classCode>
              <classCode scheme="halTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halOldTypology" n="ART">Journal articles</classCode>
              <classCode scheme="halTreeTypology" n="ART">Journal articles</classCode>
            </textClass>
            <abstract xml:lang="en">
              <p>We consider big spatial data, which is typically produced in scientific areas such as geological or seismic interpretation. The spatial data can be produced by observation (e.g. using sensors or soil instruments) or numerical simulation programs and correspond to points that represent a 3D soil cube area. However, errors in signal processing and modeling create some uncertainty, and thus a lack of accuracy in identifying geological or seismic phenomenons. Such uncertainty must be carefully analyzed. To analyze uncertainty, the main solution is to compute a Probability Density Function (PDF) of each point in the spatial cube area. However, computing PDFs on big spatial data can be very time consuming (from several hours to even months on a computer cluster). In this paper, we propose a new solution to efficiently compute such PDFs in parallel using Spark, with three methods: data grouping, machine learning prediction and sampling. We evaluate our solution by extensive experiments on different computer clusters using big data ranging from hundreds of GB to several TB. The experimental results show that our solution scales up very well and can reduce the execution time by a factor of 33 (in the order of seconds or minutes) compared with a baseline method.</p>
            </abstract>
          </profileDesc>
        </biblFull>
      </listBibl>
    </body>
    <back>
      <listOrg type="structures">
        <org type="researchteam" xml:id="struct-141072" status="OLD">
          <idno type="RNSR">201121208J</idno>
          <orgName>Scientific Data Management</orgName>
          <orgName type="acronym">ZENITH</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>LIRMM, 161 rue Ada, 34000 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://team.inria.fr/zenith/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-181" type="direct"/>
            <relation name="UMR5506" active="#struct-410122" type="indirect"/>
            <relation name="UMR5506" active="#struct-441569" type="indirect"/>
            <relation active="#struct-34586" type="direct"/>
            <relation active="#struct-300009" type="indirect"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-4626" status="VALID">
          <idno type="ROR">https://ror.org/0498ekt05</idno>
          <orgName>Laboratorio Nacional de Computação Cientifica [Rio de Janeiro]</orgName>
          <orgName type="acronym">LNCC / MCT</orgName>
          <desc>
            <address>
              <addrLine>LNCC, Av. Getulio Vargas, 333, Quitandinha, 25651-075, Petropolis, RJ</addrLine>
              <country key="BR"/>
            </address>
            <ref type="url">http://www.lncc.br</ref>
          </desc>
        </org>
        <org type="laboratory" xml:id="struct-181" status="OLD">
          <idno type="IdRef">139590827</idno>
          <idno type="ISNI">0000000405990488</idno>
          <idno type="RNSR">199111950H</idno>
          <idno type="ROR">https://ror.org/013yean28</idno>
          <orgName>Laboratoire d'Informatique de Robotique et de Microélectronique de Montpellier</orgName>
          <orgName type="acronym">LIRMM</orgName>
          <date type="start">1995-01-01</date>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>161 rue Ada - 34095 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.lirmm.fr</ref>
          </desc>
          <listRelation>
            <relation name="UMR5506" active="#struct-410122" type="direct"/>
            <relation name="UMR5506" active="#struct-441569" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-410122" status="OLD">
          <idno type="ISNI">0000000120970141</idno>
          <idno type="ROR">https://ror.org/051escj72</idno>
          <orgName>Université de Montpellier</orgName>
          <orgName type="acronym">UM</orgName>
          <date type="end">2021-12-31</date>
          <desc>
            <address>
              <addrLine>163 rue Auguste Broussonnet - 34090 Montpellier</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.umontpellier.fr/</ref>
          </desc>
        </org>
        <org type="regroupinstitution" xml:id="struct-441569" status="VALID">
          <idno type="IdRef">02636817X</idno>
          <idno type="ISNI">0000000122597504</idno>
          <idno type="ROR">https://ror.org/02feahw73</idno>
          <orgName>Centre National de la Recherche Scientifique</orgName>
          <orgName type="acronym">CNRS</orgName>
          <date type="start">1939-10-19</date>
          <desc>
            <address>
              <country key="FR"/>
            </address>
            <ref type="url">https://www.cnrs.fr/</ref>
          </desc>
        </org>
        <org type="laboratory" xml:id="struct-34586" status="VALID">
          <idno type="RNSR">198318250R</idno>
          <idno type="ROR">https://ror.org/01nzkaw91</idno>
          <orgName>Centre Inria d'Université Côte d'Azur</orgName>
          <desc>
            <address>
              <addrLine>2004 route des Lucioles BP 93 06902 Sophia Antipolis</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.inria.fr/centre/sophia/</ref>
          </desc>
          <listRelation>
            <relation active="#struct-300009" type="direct"/>
          </listRelation>
        </org>
        <org type="institution" xml:id="struct-300009" status="VALID">
          <idno type="ROR">https://ror.org/02kvxyf05</idno>
          <orgName>Institut National de Recherche en Informatique et en Automatique</orgName>
          <orgName type="acronym">Inria</orgName>
          <desc>
            <address>
              <addrLine>Domaine de VoluceauRocquencourt - BP 10578153 Le Chesnay Cedex</addrLine>
              <country key="FR"/>
            </address>
            <ref type="url">http://www.inria.fr/en/</ref>
          </desc>
        </org>
      </listOrg>
    </back>
  </text>
</TEI>