如果要重复执行此操作,应创建一个索引:wordlist = [word.strip() for word in "run, ran, rat, rob, fish, tree".split(',')]
from collections import defaultdict
class Index(object):
def __init__(self, wordlist=()):
self.trie = defaultdict(set)
for word in wordlist:
self.add_word(word)
def add_word(self, word):
""" adds word to the index """
# save the length of the word
self.trie[len(word)].add(word)
for marker in enumerate(word):
# add word to the set of words with (pos,char)
self.trie[marker].add(word)
def find(self, pattern, wildcard='-' ):
# get all word with matching length as candidates
candidates = self.trie[len(pattern)]
# get all words with all the markers
for marker in enumerate(pattern):
if marker[1] != wildcard:
candidates &= self.trie[marker]
# exit early if there are no candicates
if not candidates:
return None
return candidates
with open('dict.txt', 'rt') as lines:
wordlist = [word.strip() for word in lines]
s = Index(wordlist)
print s.find("r--")
Tries用于搜索字符串。这是一个简单的前缀trie,使用一个dict