TechnologyOctober 21, 2016

A Gremlin Implementation of the Gremlin Traversal Machine

Marko A. Rodriguez
Marko A. Rodriguez
A Gremlin Implementation of the Gremlin Traversal Machine
~ python
Python 2.7.10 (default, Oct 23 2015, 19:19:21)
[GCC 4.2.1 Compatible Apple LLVM 7.0.0 (clang-700.0.59.5)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> from gremlin_python import statics
>>> from gremlin_python.structure.graph import Graph
>>> from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
>>> statics.load_statics(globals())
>>>
>>> g = Graph().traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin','g'))
>>>
>>> g.V().has('name','marko').out('knows').count().next()
2L
>>>
>>> g.V().has('name','marko'). \
...   out('created').in_('created'). \
...   has('name',neq('marko')).dedup().name.toList()
[u'josh', u'peter']
>>>
>>> g.V().has('name','lop').in_('created'). \
...   out('knows').dedup().age.mean().next()
29.5
>>>
>>> g.V().hasLabel('person'). \
...   repeat(both('knows')).times(5). \
...   groupCount().by('name').next()
{u'vadas': 4L, u'marko': 8L, u'josh': 4L}
>>>
$ bin/gremlin.sh
         \,,,/
         (o o)
-----oOOo-(3)-oOOo-----
plugin activated: tinkerpop.server
plugin activated: tinkerpop.utilities
plugin activated: tinkerpop.tinkergraph
gremlin> import static org.apache.tinkerpop.gremlin.util.function.Lambda.function
gremlin> import static org.apache.tinkerpop.gremlin.util.function.Lambda.predicate
gremlin> import static org.apache.tinkerpop.gremlin.util.function.Lambda.consumer
gremlin> g = EmptyGraph.instance().traversal().withRemote(DriverRemoteConnection.using(Cluster.build('localhost').port(8182).create(),'g'))
==>graphtraversalsource[emptygraph[empty], standard]
gremlin>
gremlin> traversal = g.V().has('name','marko').out('knows').values('name'); []
gremlin>
gremlin> g.inject(traversal).
           map(function('it.get().bytecode')).as('bytecode').
           addV('traversal').as('traversal').property('source','g').
           select('bytecode').map(function('it.get().stepInstructions')).unfold().
           project('op','args').as('inst').
             by(function('it.operator')).
             by(function('it.arguments as List')).
           addV('step').as('step').
             property('op', select('op')).
             sideEffect(select('inst').select('args').unfold().as('arg').
                        select('step').property(list,'arg',select(last,'arg'))).
           choose(select('traversal').out('next'),
             addE('next').from(V().where(hasLabel('step').and().not(outE('next'))).where(neq('step'))).to(select('step')),              
             addE('next').from(select('traversal')).to(select('step'))).iterate()
g.V().has('name','marko').out('knows').values('name')
gremlin> g.V().has('source','g').
           repeat(out('next')).until(out('next').count().is(0)).
           path().by(valueMap('source','op','arg'))
==>[[source:[g]],[op:[V]],[op:[has],arg:[name,eq(marko)]],[op:[out],arg:[knows]],[op:[values],arg:[name]]]
gremlin> g.V().hasLabel(within('traversal','step')).drop()
gremlin>
gremlin> traversal = g.V().has('name','marko').
                       repeat(outE('knows','created').identity().inV()).times(2).
                       values('name').groupCount(); []
gremlin>
gremlin> g.inject(traversal).
           map(function('it.get().bytecode')).as('bytecode').
           addV('traversal').
             property('source','g').
             property('bytecode',select('bytecode')).
             property('depth',0L).
           repeat(V().hasLabel('traversal').has('bytecode').
             has('depth',loops()).as('traversal').
             values('bytecode').map(function('it.get().stepInstructions')).unfold().
             project('op','args').as('inst').
               by(function('it.operator')).
               by(function('it.arguments as List')).
             addV('step').as('step').
               property('op', select(last,'inst').select('op')).
               sideEffect(addE('parent').to(select(last,'traversal')).
               select(last,'inst').select('args').unfold().as('arg').
               choose(predicate('it instanceof Bytecode'),
                 addV('traversal').
                   property('source','__').
                   property('bytecode',select(last,'arg')).      
                   property('depth', union(loops(), constant(1)).sum()).
                   addE('child').from(select(last,'step')),
                 select(last,'step').property(list,'arg',select(last,'arg')))).
           choose(select(last,'traversal').not(out('next')),
             addE('next').from(select(last,'traversal')).to(select(last,'step')),
             addE('next').from(select('prev').tail(local,1)).to(select(last,'step'))).
           select(last,'step').store('prev').
           sideEffect(select(last,'traversal').properties('bytecode','depth').drop()))
g.V().has('name','marko').
  repeat(outE('knows','created').identity().inV()).times(2).
  values('name').groupCount()
gremlin> g.V().has('source').as('source').
           order().by(until(__.in('child').count().is(0)).
                      repeat(__.in('child').out('parent')).path().count(local),incr).
           choose(__.in('child'),
                  __.in('child').map(select('source').out('next')),
                  out('next')).
           until(out('next').count().is(0)).repeat(out('next')).
           path().by(valueMap('source','op','arg'))
==>[[source:[g]],[op:[V]],[op:[has],arg:[name,eq(marko)]],[op:[repeat]],[op:[times],arg:[2]],[op:[values],arg:[name]],[op:[groupCount]]]
==>[[source:[__]],[op:[repeat]],[op:[outE],arg:[knows,created]],[op:[identity]],[op:[inV]]]
gremlin> g.V().group().
                 by(label).
                 by(valueMap('name','source','op').fold()).next()
==>software=[{name=[lop]}, {name=[ripple]}]
==>person=[{name=[marko]}, {name=[vadas]}, {name=[josh]}, {name=[peter]}]
==>step=[{op=[groupCount]}, {op=[V]}, {op=[outE]}, {op=[has]}, {op=[identity]}, {op=[repeat]}, {op=[inV]}, {op=[times]}, {op=[values]}]
==>traversal=[{source=[__]}, {source=[g]}]
g.V().has('op','identity').as('step').
  addE('next').
    from(select('step').in('next')).
    to(select('step').out('next')).
  select('step').drop()
gremlin> g.V().has('source').as('source').
           order().by(until(__.in('child').count().is(0)).
                      repeat(__.in('child').out('parent')).path().count(local),incr).
           choose(__.in('child'),
                  __.in('child').map(select('source').out('next')),
                  out('next')).
           until(out('next').count().is(0)).repeat(out('next')).
           path().by(valueMap('source','op','arg'))
==>[[source:[g]],[op:[V]],[op:[has],arg:[name,eq(marko)]],[op:[repeat]],[op:[times],arg:[2]],[op:[values],arg:[name]],[op:[groupCount]]]
==>[[source:[__]],[op:[repeat]],[op:[outE],arg:[knows,created]],[op:[inV]]]
g.V().match(
  __.as('a').out('next').as('b'),
  __.as('a').has('op','outE'),
  __.as('b').has('op','inV')).
addV('step').as('c').
  property('op','out').
  sideEffect(select('a').values('arg').as('arg').
             select('c').property(list,'arg',select('arg'))).
  addE('next').from(select('a').in('next')).to('c').
  choose(select('b').out('next').count().is(gt(0)),
    addE('next').from('c').to(select('b').out('next')),
    identity()).
select('a','b').select(values).unfold().drop()
gremlin> g.V().has('source').as('source').
           order().by(until(__.in('child').count().is(0)).
                      repeat(__.in('child').out('parent')).path().count(local),incr).
           choose(__.in('child'),
                  __.in('child').map(select('source').out('next')),
                  out('next')).
           until(out('next').count().is(0)).repeat(out('next')).
           path().by(valueMap('source','op','arg'))
==>[[source:[g]],[op:[V]],[op:[has],arg:[name,eq(marko)]],[op:[repeat]],[op:[times],arg:[2]],[op:[values],arg:[name]],[op:[groupCount]]]
==>[[source:[__]],[op:[repeat]],[op:[out],arg:[knows,created]]]
gremlin> graph = TinkerGraph.open()
==>tinkergraph[vertices:0 edges:0]
gremlin> graph.io(graphml()).readGraph('data/grateful-dead.xml')
/////////////////////////////////
// Without LazyBarrierStrategy //
/////////////////////////////////
gremlin> g = graph.traversal().withoutStrategies(LazyBarrierStrategy)
==>graphtraversalsource[tinkergraph[vertices:808 edges:8049], standard]
gremlin> clockWithResult(100){
           g.V().has('name','DARK STAR').
             out('followedBy').aggregate('likes').
             in('followedBy').out('followedBy').
               where(not(within('likes'))).
             groupCount().by('name').
             order(local).by(values,decr).
               unfold().limit(10).toList() }
==>19.0969239
==>[CHINA CAT SUNFLOWER=788,SAMSON AND DELILAH=778,UNCLE JOHNS BAND=750,SCARLET BEGONIAS=747,NEW MINGLEWOOD BLUES=726,
    ESTIMATED PROPHET=709,THE MUSIC NEVER STOPPED=699,LOOKS LIKE RAIN=697,BEAT IT ON DOWN THE LINE=661,RAMBLE ON ROSE=656]
//////////////////////////////
// With LazyBarrierStrategy //
//////////////////////////////
gremlin> g = graph.traversal()                                      
==>graphtraversalsource[tinkergraph[vertices:808 edges:8049], standard]
gremlin> clockWithResult(100){
           g.V().has('name','DARK STAR').
             out('followedBy').aggregate('likes').
             in('followedBy').out('followedBy').
               where(not(within('likes'))).
             groupCount().by('name').
             order(local).by(values,decr).
               unfold().limit(10).toList() }
==>1.4836289499999997
==>[CHINA CAT SUNFLOWER=788,SAMSON AND DELILAH=778,UNCLE JOHNS BAND=750,SCARLET BEGONIAS=747,NEW MINGLEWOOD BLUES=726,
    ESTIMATED PROPHET=709,THE MUSIC NEVER STOPPED=699,LOOKS LIKE RAIN=697,BEAT IT ON DOWN THE LINE=661,RAMBLE ON ROSE=656]
///////////////////////////////
// Using Traversal.explain() //
///////////////////////////////
==>Traversal Explanation
===========================================================================================================
Original Traversal                 [GraphStep(vertex,[]), HasStep([name.eq(DARK STAR)]),
                                    VertexStep(OUT[followedBy],vertex), AggregateStep(likes),
                                    VertexStep(IN,[followedBy],vertex), VertexStep(OUT[followedBy],vertex),
                                    WherePredicateStep(without([likes])), GroupCountStep(value(name)),
                                    OrderLocalStep([[values,decr]]), UnfoldStep, RangeGlobalStep(0,10)]
...
Final Traversal                    [TinkerGraphStep(vertex,[name.eq(DARK STAR)]),
                                    VertexStep(OUT[followedBy],vertex), AggregateStep(likes),
                                    VertexStep(IN,[followedBy],vertex), NoOpBarrierStep(2500),
                                    VertexStep(OUT,[followedBy],vertex), NoOpBarrierStep(2500),
                                    WherePredicateStep(without([likes])),
                                    GroupCountStep(value(name)), OrderLocalStep([[values, decr]]),
                                    UnfoldStep, RangeGlobalStep(0,10)]
g.V().as('flatMap').
  has('op',within('out','in','both','values')).
  where(out('next').has('op',neq('barrier')).or().not(out('next'))).
  addV('step').as('barrier').
    property('op','barrier').
    property('arg','2500').
  sideEffect(select('flatMap').outE('next').as('a').
       addE('next').from('barrier').to(select('a').inV()).
       select('a').drop()).
  select('flatMap').addE('next').from('flatMap').to('barrier')
gremlin> g.V().has('source').as('source').
           order().by(until(__.in('child').count().is(0)).
                      repeat(__.in('child').out('parent')).path().count(local),incr).
           choose(__.in('child'),
                  __.in('child').map(select('source').out('next')),
                  out('next')).
           until(out('next').count().is(0)).repeat(out('next')).
           path().by(valueMap('source','op','arg'))
==>[[source:[g]],[op:[V]],[op:[has],arg:[name,eq(marko)]],[op:[repeat]],[op:[times],arg:[2]],[op:[values],arg:[name]],[op:[barrier],arg:[2500]],[op:[groupCount]]]
==>[[source:[__]],[op:[repeat]],[op:[out],arg:[knows,created]],[op:[barrier],arg:[2500]]]
g.V().has('name','marko').
  repeat(outE('knows','created').identity().inV()).times(2).
  values('name').groupCount()
g.V().has('name','marko').
  repeat(out('knows','created').barrier(2500)).times(2).
  values('name').barrier(2500).groupCount()
g.withSack(0).withSideEffect('drain',[1]).withSideEffect('fill',[]).
  V().has('source','g').as('parent').
  repeat(out('next').as('step').
    map(values('arg').fold()).as('args').
    sideEffect(select('drain').unfold().
    choose(select(last,'args').count(local).is(0),
      choose(select(last,'step').by('op')).
        option('V', V().hasLabel(not(within('step','traversal')))).
        option('out',out()).
        option('in',__.in()).
        option('both',both()).
        option('outE',outE()).
        option('inE',inE()).
        option('bothE',bothE()).
        option('inV',inV()).
        option('outV',outV()).
        option('otherV',otherV()).
        option('values',__.values()).
        option('barrier',barrier()).
        option('dedup',dedup()).
        option('repeat',identity()).     // option(none,identity())
        option('fold',identity()).       // option(none,identity())
        option('sum',identity()).        // option(none,identity())
        option('mean',identity()).       // option(none,identity())
        option('min',identity()).        // option(none,identity())
        option('max',identity()).        // option(none,identity())
        option('count',identity()).      // option(none,identity())
        option('groupCount',identity()), // option(none,identity())
      choose(select(last,'step').by('op')).
        option('V', V().hasLabel(not(within('step','traversal'))).where(within('args')).by(id).by()).
        option('has',filter(union(label(),properties().where(within('args')).by(key).by().value()).filter(predicate("it.path(last,'args')[1].test(it.get())")))).
        option('out',outE().where(within('args')).by(label).by().inV()).
        option('in',__.inE().where(within('args')).by(label).by().outV()).
        option('both',bothE().where(within('args')).by(label).by().otherV()).
        option('outE',outE().where(within('args')).by(label).by()).
        option('inE',inE().where(within('args')).by(label).by()).
        option('bothE',bothE().where(within('args')).by(label).by()).
        option('values',properties().where(within('args')).by(key).by().value()).
        option('barrier',barrier()).
        option('times',identity())).     // option(none,identity())
      store('fill')).
    sideEffect(consumer("it.sideEffects('drain').clear()")).
    sideEffect(select('fill').unfold().store('drain')).
    sideEffect(consumer("it.sideEffects('fill').clear()")).
    select(last,'step').
    choose(has('op','repeat').and().out('next').has('op','times'),
      select(last,'step').as('parent').sack(assign).by(out('next').values('arg')).out('child').as('step'),
      choose(select(last,'parent').has('op','repeat').and().out('next').count().is(0),
        choose(sack().is(gt(1)),
          sack(minus).by(constant(1)).select(last,'parent').out('child').as('step'),
          select(last,'parent').as('step').out('parent').as('parent').select(last,'step')),
        identity()))).
  until(out('next').count().is(0)).
  select('drain').
  choose(select(last,'step').has('op',within('fold','sum','mean','min','max','count','groupCount')), // option(none,identity())
    choose(select(last,'step').by('op')).
      option('fold',identity()).
      option('sum',map(unfold().sum())).
      option('mean',map(unfold().mean())).
      option('min',map(unfold().min())).
      option('max',map(unfold().max())).
      option('count',map(unfold().count())).
      option('groupCount',map(unfold().groupCount())),
    unfold()).toList() 
==>[ripple:1,lop:1]
/////////////////////////////////////
// Gremlin-Based Traversal Machine //
/////////////////////////////////////
gremlin> clockWithResult(100){ gremlin(g,
  g.V().has('name','marko').
    repeat(outE().identity().inV()).times(2).
    values('name').
    groupCount()) }
==>18.23885611
==>[[ripple:1,lop:1]]
//////////////////////////////////
// Java-Based Traversal Machine //
//////////////////////////////////
gremlin> clockWithResult(100) {
  g.V().has('name','marko').
    repeat(outE().identity().inV()).times(2).
    values('name').
    groupCount().toList() }
==>0.45385166
==>[[ripple:1,lop:1]]
gremlin> gremlin(g, g.V().has('name','marko').out('knows').count())
==>2
gremlin> gremlin(g,
  g.V().has('name','marko').
    out('created').in('created').
    has('name',neq('marko')).dedup().values('name'))
==>josh
==>peter
gremlin> gremlin(g,
  g.V().has('name','lop').in('created').
    out('knows').values('age').mean())
==>29.5
gremlin> gremlin(g,
  g.V().hasLabel('person').
    repeat(both('knows')).times(5).
    values('name').groupCount())
==>[vadas:4,josh:4,marko:8]
g.addV('message').property('text','Welcome to the machine.').
  addE('bellow').to(V().has('source','g'))
Discover more
Gremlin
Share

One-stop Data API for Production GenAI

Astra DB gives JavaScript developers a complete data API and out-of-the-box integrations that make it easier to build production RAG apps with high relevancy and low latency.