1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
| # coding: utf-8
# python 3.6.4 x86_64
import sys
import re
l_regex = [re.compile(r"(is|was|were|are|mean|means|will be|to be|can be)\s[(a|the|an|The|in)]\s(\S*)"),
re.compile(r"(is|was|were|are|mean|means|will be|to be|can be)\s[(a|the|an|The)]\s(\S*)"),
re.compile(r"(is|was|were|are|mean|means|will be|to be|can be)\s[(a|the|an|The)]\s(\S*)")
]
page = ["Toronto is the largest city in Canada.",
"Albert Einstein was a scientist.",
"Paris is the capital of France.",
"Paris is the big city in France."]
def extractType(page, regex, ln=len(regex)):
if(ln == 0):
return None
i = ln - 1
m = regex[i].search(page)
if m:
return m.group(2)
else:
return extractType(page, regex, ln=i)
for i, str in enumerate(page):
print(extractType(str, l_regex)) |
Partager