Eisner Algorithm for Dependency Parsing¶

1. Dependency Parsing¶

1.1 Projective & Non-projective¶

Definition of projecive: a word and its descendants form a contiguous substring of the sentence.

Exmaples of non-projective trees:

(From McDonald et al. [2005])

1.2 Edge Based Factorization¶

The score of a depdency tree $y$ for sentence $x$ is

$$s(x, y) = \sum_{(i, j) \in y}{s(i, j)}$$

Parsing is to find the tree $y^*$ with the highest score for a given sentence $x$. $$y^* = \arg \max_{y}{s(x, y)} $$

2. Eisner Algorithm¶

2.1 Span¶

The top left "trapezoid" represents a parse tree whose root is at $x_i$ and the rightmost child of $x_i$ is $x_j$.
The top right "triangle" represents a parse tree whose root is at $x_i$ and the rightmost child of $x_i$ is $x_k$. ($ i \lt k \leq j$)
The bottom left "trapezoid" represents a parse tree whose root is at $x_j$ and the leftmost child of $x_j$ is $x_i$.
The bottom right "triangle" represents a parse tree whose root is at $x_j$ and the leftmost child of $x_j$ is $x_k$. ($ i \leq k \lt j$)

import numpy as np

# Some constants
L, R = 0, 1
I, C = 0, 1
DIRECTIONS = (L, R)
COMPLETENESS = (I, C)
NEG_INF = -float('inf')


class Span(object):
    def __init__(self, left_idx, right_idx, head_side, complete):
        self.data = (left_idx, right_idx, head_side, complete)

    @property
    def left_idx(self):
        return self.data[0]

    @property
    def right_idx(self):
        return self.data[1]

    @property
    def head_side(self):
        return self.data[2]

    @property
    def complete(self):
        return self.data[3]

    def __str__(self):
        return "({}, {}, {}, {})".format(
            self.left_idx,
            self.right_idx,
            "L" if self.head_side == L else "R",
            "C" if self.complete == C else "I",
        )

    def __repr__(self):
        return self.__str__()

    def __hash__(self):
        return hash(self.data)

    def __eq__(self, other):
        return isinstance(other, Span) and hash(other) == hash(self)

2.2 Algorithm¶

2.2.1 base case¶

2.2.2 Recursion¶

$ s(i,j,R,I) = \max_{i \leq k \lt j}{\{s(i, k, R, C) + s(k+1, j, L, C)\} + W_{ij}}$
$ s(i,j,L,I) = \max_{i \leq k \lt j}{\{s(i, k, R, C) + s(k+1, j, L, C)\} + W_{ji}}$
$ s(i,j,R,C) = \max_{i \lt k \leq j}{\{s(i, k, R, I) + s(k, j, R, C)\}}$
$ s(i,j,L,C) = \max_{i \leq k \lt j}{\{s(i, k, L, C) + s(k, j, L, I)\}}$

2.2.3 final solution¶

def eisner(weight):
    """
    `N` denotes the length of sentence.

    :param weight: size N x N
    :return: the projective tree with maximum score
    """
    N = weight.shape[0]

    btp = {}  # Back-track pointer
    dp_s = {}

    # Init
    for i in range(N):
        for j in range(i + 1, N):
            for dir in DIRECTIONS:
                for comp in COMPLETENESS:
                    dp_s[Span(i, j, dir, comp)] = NEG_INF

    # base case
    for i in range(N):
        for dir in DIRECTIONS:
            dp_s[Span(i, i, dir, C)] = 0.
            btp[Span(i, i, dir, C)] = None

    rules = [
        # span_shape_tuple := (span_direction, span_completeness),
        # rule := (span_shape, (left_subspan_shape, right_subspan_shape))
        ((L, I), ((R, C), (L, C))),
        ((R, I), ((R, C), (L, C))),
        ((L, C), ((L, C), (L, I))),
        ((R, C), ((R, I), (R, C))),
    ]

    for size in range(1, N):
        for i in range(0, N - size):
            j = i + size
            for rule in rules:
                ((dir, comp), ((l_dir, l_comp), (r_dir, r_comp))) = rule

                if comp == I:
                    edge_w = weight[i, j] if (dir == R) else weight[j, i]
                    k_start, k_end = (i, j)
                    offset = 1
                else:
                    edge_w = 0.
                    k_start, k_end = (i + 1, j + 1) if dir == R else (i, j)
                    offset = 0

                span = Span(i, j, dir, comp)
                for k in range(k_start, k_end):
                    l_span = Span(i, k, l_dir, l_comp)
                    r_span = Span(k + offset, j, r_dir, r_comp)
                    s = edge_w + dp_s[l_span] + dp_s[r_span]
                    if s > dp_s[span]:
                        dp_s[span] = s
                        btp[span] = (l_span, r_span)

    # recover tree
    return back_track(btp, Span(0, N - 1, R, C), set())


def back_track(btp, span, edge_set):
    if span.complete == I:
        if span.head_side == L:
            edge = (span.right_idx, span.left_idx)
        else:
            edge = (span.left_idx, span.right_idx)
        edge_set.add(edge)

    if btp[span] is not None:
        l_span, r_span = btp[span]

        back_track(btp, l_span, edge_set)
        back_track(btp, r_span, edge_set)
    else:
        return

    return edge_set

from nltk.parse.dependencygraph import DependencyGraph

def edges_to_dg(edge_set, words):
    N = len(edge_set)
    dg = DependencyGraph()
    for i in range(1, N + 1):
        dg.add_node({'address': i, 'word': words[i-1]})
    for (h, c) in edge_set:
        dg.add_arc(h, c)
    dg.nodes[0]['word'] = 'ROOT'
    return dg

def test_case_1():
    weight = np.array(
        [
            [0, 100, 50],
            [0, 0, 4],
            [0, 11, 0]
        ]

    )

    return eisner(weight)
    
edges_to_dg(test_case_1(), ['A', 'B'])

def test_case_2():
    weight = np.array(
        [
            [0, 100, 20, 10, 1],
            [0, 0, 4, 3, 1],
            [0, 11, 0, 50, 2],
            [0, 22, 4, 0, 5],
            [0, 5, 6, 1, 0]
        ]

    )

    return eisner(weight)
    
edges_to_dg(test_case_2(), ['A', 'B', 'C', 'D'])

2.2.4 Complexity¶

Time complexity: $O(n^3)$

Space complexity: $O(n^2)$