Natural language parsing¶

To parse a sentence, we run the Stanford dependency parser, match patterns and use combinators for translating.

import $ivy.`io.github.siddhartha-gadgil::provingground-nlp:0.1.0`
import $ivy.`edu.stanford.nlp:stanford-corenlp:3.7.0`

import $ivy.$                                                     

import $ivy.$

interp.load.ivy(coursier.Dependency(
      coursier.Module("edu.stanford.nlp", "stanford-corenlp"),
      "3.7.0",
      attributes = coursier.Attributes(classifier = "models")
    )
  )

import edu.stanford._

import edu.stanford._

import edu.stanford.nlp._

import edu.stanford.nlp._

import trees.Tree
import simple._

import trees.Tree

import simple._

val sent = new Sentence("if a prime number P divides MN, P divides one of M and N")

sent: Sentence = if a prime number P divides MN, P divides one of M and N

val tree= sent.parse

tree: Tree = (ROOT (S (SBAR (IN if) (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN))))) (, ,) (NP (NNP P)) (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N)))))))

import provingground._, translation._

import provingground._, translation._

import TreePatterns._

import TreePatterns._

import scala.collection.JavaConversions._

import scala.collection.JavaConversions._

val st = tree.subTrees.toList

st: List[Tree] = List(
  (, ,),
  (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N)))),
  (NNP P),
  (CD one),
  M,
  N,
  (NN P),
  P,
  (NN number),
  number,
  (VP (VBZ divides) (NP (NNP MN))),
  (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N))))),
  (NP (DT a) (JJ prime) (NN number) (NN P)),
  (NP (NNP MN)),
  (NP (NNP P)),
  divides,
  and,
  (ROOT (S (SBAR (IN if) (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN))))) (, ,) (NP (NNP P)) (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N))))))),
  of,
  (NP (NNP M) (CC and) (NNP N)),
  (NNP M),
  if,
  (NNP N),
  (JJ prime),
  prime,
  a,
  MN,
  (PP (IN of) (NP (NNP M) (CC and) (NNP N))),
  (NP (CD one)),
  (SBAR (IN if) (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN))))),
  (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN)))),
  one,
  ,,
...

val matches = st.map(IfTree.unapply).flatten

matches: List[(cats.package.Id[Tree], Vector[Tree])] = List(
  (
    (S (NP (DT a) (JJ prime) (NN number) (NN P)) (VP (VBZ divides) (NP (NNP MN)))),
    Vector(
      (NP (NNP P)),
      (VP (VBZ divides) (NP (NP (CD one)) (PP (IN of) (NP (NNP M) (CC and) (NNP N)))))
    )
  )
)