CYK Algorithm

pic1.jpg pcfg.jpg example_trees.jpg

Equation

cky_eq.jpg

Example

inside1.jpg eq2.jpg inside2.jpg eq2.jpg inside3.jpg

In [1]:
nonterminals = {'S', 'NP', 'VP', 'PP', 'P', 'V'}
rules = {('S', 'NP', 'VP') : 1.0, 
         ('PP', 'P', 'NP') : 1.0,
         ('VP', 'V', 'NP') : 0.7,
         ('VP', 'VP', 'PP') : 0.3,
         ('P', 'with') : 1.0,
         ('V', 'saw') : 1.0,
         ('NP', 'NP', 'PP'): 0.4,
         ('NP', 'astronomers'): 0.1,
         ('NP', 'ears'): 0.18, 
         ('NP', 'saw'): 0.04, 
         ('NP', 'stars'): 0.18, 
         ('NP', 'telescopes'): 0.1,}
raw_sentence = 'astronomers saw stars with ears'
In [2]:
sentence = raw_sentence.split(' ')
sentence_len = len(sentence)
score_holder = {i : {} for i in [(i, j) for i in range(sentence_len) for j in range(sentence_len) if i <= j]}
print(score_holder)
{(0, 0): {}, (0, 1): {}, (0, 2): {}, (0, 3): {}, (0, 4): {}, (1, 1): {}, (1, 2): {}, (1, 3): {}, (1, 4): {}, (2, 2): {}, (2, 3): {}, (2, 4): {}, (3, 3): {}, (3, 4): {}, (4, 4): {}}

eq2.jpg

In [3]:
# calculate cyk score
for row in range(sentence_len):
    for column in range(sentence_len - row):
        if row == 0:
            for rule in rules.keys():
                if rule[1] == sentence[column]:
                    score_holder[(column, column)][rule[0]] = (rule, (column, ), rules[rule])
        else:
            for k in range(column, column + row):
                left_children = score_holder[(column, k)].keys()
                if not bool(left_children): continue
                right_children = score_holder[(k + 1, column + row)].keys()
                if not bool(right_children): continue
                for left_child in left_children:
                    for right_child in right_children:
                        for parent in nonterminals:
                            if (parent, left_child, right_child) in rules:
                                current_score = score_holder[(column, k)][left_child][2] * \
                                                score_holder[(k + 1, column + row)][right_child][2] * \
                                                rules[(parent, left_child, right_child)]

                                if parent in score_holder[(column, column + row)]:

                                    if score_holder[(column, column + row)][parent][2] < current_score:
                                        
                                        score_holder[(column, column + row)][parent] = ((parent, left_child, right_child),
                                                                                        (column, k, column + row),
                                                                                        current_score)
                                    else:
                                        pass
                                else:
                                    score_holder[(column, column + row)][parent] = ((parent, left_child, right_child),
                                                                                    (column, k, column + row),
                                                                                    current_score)
print(score_holder[(0, sentence_len - 1)]['S'])
(('S', 'NP', 'VP'), (0, 0, 4), 0.0009071999999999999)
In [4]:
def draw_tree(holder, span, nontemrinal, layer=0):
    rule, children_span, _ = holder[span][nontemrinal]
    if len(rule) != 3:
        return '  '*layer + '(' + rule[0] + '|' + rule[1] +  ')\n'
    string = '  '*layer + '(' + nontemrinal + '\n'
    left_span = (children_span[0], children_span[1])
    right_span = (children_span[1]+1, children_span[2])
    left_child, right_child = rule[1:]
    string += draw_tree(holder, left_span, left_child, layer=layer+1)
    string += draw_tree(holder, right_span, right_child, layer=layer+1)
    return string
print(draw_tree(score_holder, (0, sentence_len-1), 'S', layer=0))
(S
  (NP|astronomers)
  (VP
    (V|saw)
    (NP
      (NP|stars)
      (PP
        (P|with)
        (NP|ears)

In [ ]: