nonterminals = {'S', 'NP', 'VP', 'PP', 'P', 'V'}
rules = {('S', 'NP', 'VP') : 1.0,
('PP', 'P', 'NP') : 1.0,
('VP', 'V', 'NP') : 0.7,
('VP', 'VP', 'PP') : 0.3,
('P', 'with') : 1.0,
('V', 'saw') : 1.0,
('NP', 'NP', 'PP'): 0.4,
('NP', 'astronomers'): 0.1,
('NP', 'ears'): 0.18,
('NP', 'saw'): 0.04,
('NP', 'stars'): 0.18,
('NP', 'telescopes'): 0.1,}
raw_sentence = 'astronomers saw stars with ears'
sentence = raw_sentence.split(' ')
sentence_len = len(sentence)
score_holder = {i : {} for i in [(i, j) for i in range(sentence_len) for j in range(sentence_len) if i <= j]}
print(score_holder)
# calculate cyk score
for row in range(sentence_len):
for column in range(sentence_len - row):
if row == 0:
for rule in rules.keys():
if rule[1] == sentence[column]:
score_holder[(column, column)][rule[0]] = (rule, (column, ), rules[rule])
else:
for k in range(column, column + row):
left_children = score_holder[(column, k)].keys()
if not bool(left_children): continue
right_children = score_holder[(k + 1, column + row)].keys()
if not bool(right_children): continue
for left_child in left_children:
for right_child in right_children:
for parent in nonterminals:
if (parent, left_child, right_child) in rules:
current_score = score_holder[(column, k)][left_child][2] * \
score_holder[(k + 1, column + row)][right_child][2] * \
rules[(parent, left_child, right_child)]
if parent in score_holder[(column, column + row)]:
if score_holder[(column, column + row)][parent][2] < current_score:
score_holder[(column, column + row)][parent] = ((parent, left_child, right_child),
(column, k, column + row),
current_score)
else:
pass
else:
score_holder[(column, column + row)][parent] = ((parent, left_child, right_child),
(column, k, column + row),
current_score)
print(score_holder[(0, sentence_len - 1)]['S'])
def draw_tree(holder, span, nontemrinal, layer=0):
rule, children_span, _ = holder[span][nontemrinal]
if len(rule) != 3:
return ' '*layer + '(' + rule[0] + '|' + rule[1] + ')\n'
string = ' '*layer + '(' + nontemrinal + '\n'
left_span = (children_span[0], children_span[1])
right_span = (children_span[1]+1, children_span[2])
left_child, right_child = rule[1:]
string += draw_tree(holder, left_span, left_child, layer=layer+1)
string += draw_tree(holder, right_span, right_child, layer=layer+1)
return string
print(draw_tree(score_holder, (0, sentence_len-1), 'S', layer=0))