Package nltk_lite :: Package contrib :: Package mit :: Package six863 :: Package kimmo :: Module morphology
[hide private]
[frames] | no frames]

Source Code for Module nltk_lite.contrib.mit.six863.kimmo.morphology

  1  from fsa import FSA 
  2  import yaml 
  3  from featurelite import unify 
  4   
5 -class YAMLwrapper(object):
6 - def __init__(self, yamlstr):
7 self.yamlstr = yamlstr 8 self._cache = None
9 - def value(self):
10 if self._cache is not None: return self._cache 11 self._cache = yaml.load(self.yamlstr) 12 return self._cache
13
14 -def combine_features(a, b):
15 """ 16 Return an object that combines the feature labels a and b. 17 18 For now, this only does string concatenation; it can be extended 19 to unify 'featurelite' style dictionaries. 20 """ 21 def override_features(a, b): 22 return b
23 24 if isinstance(a, YAMLwrapper): a = a.value() 25 if isinstance(b, YAMLwrapper): b = b.value() 26 if isinstance(a, str) and isinstance(b, str): 27 return a+b 28 else: 29 d = {} 30 vars = {} 31 32 return unify(a, b, vars, fail=override_features) 33 return '%s%s' % (a, b) 34
35 -class KimmoMorphology(object):
36 - def __init__(self, fsa):
37 self._fsa = fsa
38 - def fsa(self): return self._fsa
39 - def valid_lexical(self, state, word, alphabet):
40 trans = self.fsa()._transitions[state] 41 for label in trans.keys(): 42 if label is not None and label[0].startswith(word) and len(label[0]) > len(word): 43 next = label[0][len(word):] 44 for pair in alphabet: 45 if next.startswith(pair.input()): yield pair.input()
46 - def next_states(self, state, word):
47 choices = self.fsa()._transitions[state] 48 for (key, value) in choices.items(): 49 if key is None: 50 if word == '': 51 for next in value: yield (next, None) 52 else: 53 if key[0] == word: 54 for next in value: 55 yield (next, key[1])
56 57 @staticmethod
58 - def load(filename):
59 f = open(filename) 60 result = KimmoMorphology.from_text(f.read()) 61 f.close() 62 return result
63 @staticmethod
64 - def from_text(text):
65 fsa = FSA([], {}, 'Begin', ['End']) 66 state = 'Begin' 67 for line in text.split('\n'): 68 line = line.strip() 69 if not line or line.startswith(';'): continue 70 if line[-1] == ':': 71 state = line[:-1] 72 else: 73 if line.split()[0].endswith(':'): 74 parts = line.split() 75 name = parts[0][:-1] 76 next_states = parts[1:] 77 for next in next_states: 78 fsa.insert_safe(name, None, next) 79 elif len(line.split()) > 2: 80 # this is a lexicon entry 81 word, next, features = line.split(None, 2) 82 if word.startswith('"') or\ 83 word.startswith("'") and word.endswith("'"): 84 word = eval(word) 85 if features: 86 if features == 'None': features = None 87 elif features[0] in '\'"{': 88 features = YAMLwrapper(features) 89 fsa.insert_safe(state, (word, features), next) 90 elif len(line.split()) == 2: 91 word, next = line.split() 92 features = '' 93 if word == "''": 94 word = '' 95 fsa.insert_safe(state, (word, features), next) 96 else: 97 print "Ignoring line in morphology: %r" % line 98 return KimmoMorphology(fsa)
99
100 -def demo():
101 print KimmoMorphology.load('english.lex')
102 103 if __name__ == '__main__': 104 demo() 105