Natural language parsing

To parse a sentence, we run the Stanford dependency parser, match patterns and use combinators for translating.

In [1]:
import $ivy.`io.github.siddhartha-gadgil::provingground-nlp:0.1.0`
import $ivy.`edu.stanford.nlp:stanford-corenlp:3.7.0`
Out[1]:
import $ivy.$                                                     

import $ivy.$                                        
In [2]:
interp.load.ivy(coursier.Dependency(
      coursier.Module("edu.stanford.nlp", "stanford-corenlp"),
      "3.7.0",
      attributes = coursier.Attributes(classifier = "models")
    )
  )
In [3]:
import edu.stanford._
Out[3]:
import edu.stanford._
In [4]:
import edu.stanford.nlp._
Out[4]:
import edu.stanford.nlp._
In [5]:
import trees.Tree
import simple._
Out[5]:
import trees.Tree

import simple._
In [6]:
val sent = new Sentence("if a prime number P divides MN, P divides one of M and N")
Out[6]:
sent: Sentence = if a prime number P divides MN, P divides one of M and N
In [7]:
val tree= sent.parse
Out[7]:
tree: Tree = (ROOT (S (SBAR (IN if) (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN))))) (, ,) (NP (NNP P)) (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N)))))))
In [8]:
import provingground._, translation._
Out[8]:
import provingground._, translation._
In [9]:
import TreePatterns._
Out[9]:
import TreePatterns._
In [10]:
import scala.collection.JavaConversions._
Out[10]:
import scala.collection.JavaConversions._
In [11]:
val st = tree.subTrees.toList
Out[11]:
st: List[Tree] = List(
  (, ,),
  (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N)))),
  (NNP P),
  (CD one),
  M,
  N,
  (NN P),
  P,
  (NN number),
  number,
  (VP (VBZ divides) (NP (NNP MN))),
  (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N))))),
  (NP (DT a) (JJ prime) (NN number) (NN P)),
  (NP (NNP MN)),
  (NP (NNP P)),
  divides,
  and,
  (ROOT (S (SBAR (IN if) (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN))))) (, ,) (NP (NNP P)) (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N))))))),
  of,
  (NP (NNP M) (CC and) (NNP N)),
  (NNP M),
  if,
  (NNP N),
  (JJ prime),
  prime,
  a,
  MN,
  (PP (IN of) (NP (NNP M) (CC and) (NNP N))),
  (NP (CD one)),
  (SBAR (IN if) (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN))))),
  (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN)))),
  one,
  ,,
...
In [12]:
val matches = st.map(IfTree.unapply).flatten
Out[12]:
matches: List[(cats.package.Id[Tree], Vector[Tree])] = List(
  (
    (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN)))),
    Vector(
      (NP (NNP P)),
      (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N)))))
    )
  )
)
In [ ]: