Zen 98052
2016-06-09 15:28:08 UTC
Hi,
I have a Sparql query below, which doesn't seem efficient.
I noticed when running it, Jena calls execute(OpBGP opBGP, QueryIterator ...) so many times.
I have my own implementation in that function (overrides base class OpExecutor), which it'll make call to our back-end storage.
From qparse output (attached below), it looks like the culprit is because the query has BGPs inside the FILTER, which explains the behavior I am seeing.
Is there a better way to re-write the query below to achieve same result, but more efficient (and lead to better performance)?
Thanks,
Z
/// SPARQL QUERY:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX raw: <http://v/raw#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX v: <http://b/dir/>
SELECT (COUNT(?x) AS ?count) WHERE
{
?x rdf:type v:Person .
{
SELECT ?x WHERE
{
?x v:hasSnapshot ?snapshot .
?snapshot rdf:type v:DS .
?snapshot v:mdId ?id .
VALUES ?id { 'b01.xml' 'f5f.xml' }
MINUS
{
?x v:hasSnapshot ?snapshot .
?snapshot rdf:type v:DS .
?snapshot v:mdId ?id .
VALUES ?id { 'def.xml' '191.xml' }
}
}
}
?x ?p ?o .
OPTIONAL
{
?o ?x ?y .
?o rdf:type ?type.
FILTER NOT EXISTS
{
{ ?o rdf:type v:Dynamic }
UNION
{ ?o rdf:type v:Static }
}
}
}
/// OUTPUT FROM running "qparse --explain --print=op -v":
(prefix ((raw: <http://v/raw#>)
(rdfs: <http://www.w3.org/2000/01/rdf-schema#>)
(rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>)
(owl: <http://www.w3.org/2002/07/owl#>)
(v: <http://b/dir/>))
(project (?count)
(extend ((?count ?.0))
(group () ((?.0 (count ?x)))
(conditional
(sequence
(join
(bgp (triple ?x rdf:type v:Person))
(project (?x)
(minus
(sequence
(table (vars ?/id)
(row [?/id "b01.xml"])
(row [?/id "f5f.xml"])
)
(bgp
(triple ?x v:hasSnapshot ?/snapshot)
(triple ?/snapshot rdf:type v:DS)
(triple ?/snapshot v:mdId ?/id)
))
(sequence
(table (vars ?/id)
(row [?/id "def.xml"])
(row [?/id "191.xml"])
)
(bgp
(triple ?x v:hasSnapshot ?/snapshot)
(triple ?/snapshot rdf:type v:DS)
(triple ?/snapshot v:mdId ?/id)
)))))
(bgp (triple ?x ?p ?o)))
(sequence
(filter (notexists
(union
(bgp (triple ?o rdf:type v:Dynamic))
(bgp (triple ?o rdf:type v:Static))))
(bgp (triple ?o ?x ?y)))
(bgp (triple ?o rdf:type ?type))))))))
I have a Sparql query below, which doesn't seem efficient.
I noticed when running it, Jena calls execute(OpBGP opBGP, QueryIterator ...) so many times.
I have my own implementation in that function (overrides base class OpExecutor), which it'll make call to our back-end storage.
From qparse output (attached below), it looks like the culprit is because the query has BGPs inside the FILTER, which explains the behavior I am seeing.
Is there a better way to re-write the query below to achieve same result, but more efficient (and lead to better performance)?
Thanks,
Z
/// SPARQL QUERY:
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX raw: <http://v/raw#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX v: <http://b/dir/>
SELECT (COUNT(?x) AS ?count) WHERE
{
?x rdf:type v:Person .
{
SELECT ?x WHERE
{
?x v:hasSnapshot ?snapshot .
?snapshot rdf:type v:DS .
?snapshot v:mdId ?id .
VALUES ?id { 'b01.xml' 'f5f.xml' }
MINUS
{
?x v:hasSnapshot ?snapshot .
?snapshot rdf:type v:DS .
?snapshot v:mdId ?id .
VALUES ?id { 'def.xml' '191.xml' }
}
}
}
?x ?p ?o .
OPTIONAL
{
?o ?x ?y .
?o rdf:type ?type.
FILTER NOT EXISTS
{
{ ?o rdf:type v:Dynamic }
UNION
{ ?o rdf:type v:Static }
}
}
}
/// OUTPUT FROM running "qparse --explain --print=op -v":
(prefix ((raw: <http://v/raw#>)
(rdfs: <http://www.w3.org/2000/01/rdf-schema#>)
(rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>)
(owl: <http://www.w3.org/2002/07/owl#>)
(v: <http://b/dir/>))
(project (?count)
(extend ((?count ?.0))
(group () ((?.0 (count ?x)))
(conditional
(sequence
(join
(bgp (triple ?x rdf:type v:Person))
(project (?x)
(minus
(sequence
(table (vars ?/id)
(row [?/id "b01.xml"])
(row [?/id "f5f.xml"])
)
(bgp
(triple ?x v:hasSnapshot ?/snapshot)
(triple ?/snapshot rdf:type v:DS)
(triple ?/snapshot v:mdId ?/id)
))
(sequence
(table (vars ?/id)
(row [?/id "def.xml"])
(row [?/id "191.xml"])
)
(bgp
(triple ?x v:hasSnapshot ?/snapshot)
(triple ?/snapshot rdf:type v:DS)
(triple ?/snapshot v:mdId ?/id)
)))))
(bgp (triple ?x ?p ?o)))
(sequence
(filter (notexists
(union
(bgp (triple ?o rdf:type v:Dynamic))
(bgp (triple ?o rdf:type v:Static))))
(bgp (triple ?o ?x ?y)))
(bgp (triple ?o rdf:type ?type))))))))