1
2
3
4
5
6 """
7 Maximum Entropy code.
8
9 Uses Improved Iterative Scaling:
10 XXX ref
11
12 # XXX need to define terminology
13
14 """
15
16 import numpy
17
18
19
20 MAX_IIS_ITERATIONS = 10000
21 IIS_CONVERGE = 1E-5
22 MAX_NEWTON_ITERATIONS = 100
23 NEWTON_CONVERGE = 1E-10
24
26 """Holds information for a Maximum Entropy classifier.
27
28 Members:
29 classes List of the possible classes of data.
30 alphas List of the weights for each feature.
31 feature_fns List of the feature functions.
32
33 """
35 self.classes = []
36 self.alphas = []
37 self.feature_fns = []
38
40 """calculate(me, observation) -> list of log probs
41
42 Calculate the log of the probability for each class. me is a
43 MaxEntropy object that has been trained. observation is a vector
44 representing the observed data. The return value is a list of
45 unnormalized log probabilities for each class.
46
47 """
48 scores = []
49 for klass in range(len(me.classes)):
50 lprob = 0.0
51 for fn, alpha in map(None, me.feature_fns, me.alphas):
52 lprob += fn(observation, klass) * alpha
53 scores.append(lprob)
54 return scores
55
68
70 """_eval_feature_fn(fn, xs, classes) -> dict of values
71
72 Evaluate a feature function on every instance of the training set
73 and class. fn is a callback function that takes two parameters: a
74 training instance and a class. Return a dictionary of (training
75 set index, class index) -> non-zero value. Values of 0 are not
76 stored in the dictionary.
77
78 """
79 values = {}
80 for i in range(len(xs)):
81 for j in range(len(classes)):
82 f = fn(xs[i], classes[j])
83 if f != 0:
84 values[(i, j)] = f
85 return values
86
88 """_calc_empirical_expects(xs, ys, classes, features) -> list of expectations
89
90 Calculate the expectation of each function from the data. This is
91 the constraint for the maximum entropy distribution. Return a
92 list of expectations, parallel to the list of features.
93
94 """
95
96
97 class2index = {}
98 for index, key in enumerate(classes):
99 class2index[key] = index
100 ys_i = [class2index[y] for y in ys]
101
102 expect = []
103 N = len(xs)
104 for feature in features:
105 s = 0
106 for i in range(N):
107 s += feature.get((i, ys_i[i]), 0)
108 expect.append(float(s) / N)
109 return expect
110
112 """_calc_model_expects(xs, classes, features, alphas) -> list of expectations.
113
114 Calculate the expectation of each feature from the model. This is
115 not used in maximum entropy training, but provides a good function
116 for debugging.
117
118 """
119
120
121 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
122
123 expects = []
124 for feature in features:
125 sum = 0.0
126 for (i, j), f in feature.items():
127 sum += p_yx[i][j] * f
128 expects.append(sum/len(xs))
129 return expects
130
132 """_calc_p_class_given_x(xs, classes, features, alphas) -> matrix
133
134 Calculate P(y|x), where y is the class and x is an instance from
135 the training set. Return a XSxCLASSES matrix of probabilities.
136
137 """
138 prob_yx = numpy.zeros((len(xs), len(classes)))
139
140
141 for feature, alpha in map(None, features, alphas):
142 for (x, y), f in feature.items():
143 prob_yx[x][y] += alpha * f
144
145 prob_yx = numpy.exp(prob_yx)
146
147 for i in range(len(xs)):
148 z = sum(prob_yx[i])
149 prob_yx[i] = prob_yx[i] / z
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164 return prob_yx
165
167 """_calc_f_sharp(N, nclasses, features) -> matrix of f sharp values."""
168
169 f_sharp = numpy.zeros((N, nclasses))
170 for feature in features:
171 for (i, j), f in feature.items():
172 f_sharp[i][j] += f
173 return f_sharp
174
176
177
178 delta = 0.0
179 iters = 0
180 while iters < MAX_NEWTON_ITERATIONS:
181 f_newton = df_newton = 0.0
182 for (i, j), f in feature.items():
183 prod = prob_yx[i][j] * f * numpy.exp(delta * f_sharp[i][j])
184 f_newton += prod
185 df_newton += prod * f_sharp[i][j]
186 f_newton, df_newton = empirical - f_newton / N, -df_newton / N
187
188 ratio = f_newton / df_newton
189 delta -= ratio
190 if numpy.fabs(ratio) < NEWTON_CONVERGE:
191 break
192 iters = iters + 1
193 else:
194 raise RuntimeError("Newton's method did not converge")
195 return delta
196
197 -def _train_iis(xs, classes, features, f_sharp, alphas, e_empirical):
198 """Do one iteration of hill climbing to find better alphas (PRIVATE)."""
199
200
201
202 p_yx = _calc_p_class_given_x(xs, classes, features, alphas)
203
204 N = len(xs)
205 newalphas = alphas[:]
206 for i in range(len(alphas)):
207 delta = _iis_solve_delta(N, features[i], f_sharp, e_empirical[i], p_yx)
208 newalphas[i] += delta
209 return newalphas
210
211
212 -def train(training_set, results, feature_fns, update_fn=None):
213 """Train a maximum entropy classifier, returns MaxEntropy object.
214
215 Train a maximum entropy classifier on a training set.
216 training_set is a list of observations. results is a list of the
217 class assignments for each observation. feature_fns is a list of
218 the features. These are callback functions that take an
219 observation and class and return a 1 or 0. update_fn is a
220 callback function that's called at each training iteration. It is
221 passed a MaxEntropy object that encapsulates the current state of
222 the training.
223
224 """
225 if not len(training_set):
226 raise ValueError("No data in the training set.")
227 if len(training_set) != len(results):
228 raise ValueError("training_set and results should be parallel lists.")
229
230
231 xs, ys = training_set, results
232
233
234 classes = list(set(results))
235 classes.sort()
236
237
238 features = [_eval_feature_fn(fn, training_set, classes)
239 for fn in feature_fns]
240
241 f_sharp = _calc_f_sharp(len(training_set), len(classes), features)
242
243
244 e_empirical = _calc_empirical_expects(xs, ys, classes, features)
245
246
247 alphas = [0.0] * len(features)
248 iters = 0
249 while iters < MAX_IIS_ITERATIONS:
250 nalphas = _train_iis(xs, classes, features, f_sharp,
251 alphas, e_empirical)
252 diff = map(lambda x, y: numpy.fabs(x-y), alphas, nalphas)
253 diff = reduce(lambda x, y: x+y, diff, 0)
254 alphas = nalphas
255
256 me = MaxEntropy()
257 me.alphas, me.classes, me.feature_fns = alphas, classes, feature_fns
258 if update_fn is not None:
259 update_fn(me)
260
261 if diff < IIS_CONVERGE:
262 break
263 else:
264 raise RuntimeError("IIS did not converge")
265
266 return me
267
268
269 if __name__ == "__main__" :
270
271
272
273 xcar=[
274 ['Red', 'Sports', 'Domestic'],
275 ['Red', 'Sports', 'Domestic'],
276 ['Red', 'Sports', 'Domestic'],
277 ['Yellow', 'Sports', 'Domestic'],
278 ['Yellow', 'Sports', 'Imported'],
279 ['Yellow', 'SUV', 'Imported'],
280 ['Yellow', 'SUV', 'Imported'],
281 ['Yellow', 'SUV', 'Domestic'],
282 ['Red', 'SUV', 'Imported'],
283 ['Red', 'Sports', 'Imported']
284 ]
285
286 ycar=[
287 'Yes',
288 'No',
289 'Yes',
290 'No',
291 'Yes',
292 'No',
293 'Yes',
294 'No',
295 'No',
296 'Yes'
297 ]
298
299
301 if ts[0] =='Red':
302 return 0
303 else:
304 return 1
305
307 if ts[1] =='Sports':
308 return 0
309 else:
310 return 1
311
313 if ts[2] =='Domestic':
314 return 0
315 else:
316 return 1
317
318 user_functions=[udf1, udf2, udf3]
319
320 xe=train(xcar, ycar, user_functions)
321 for xv,yv in zip(xcar, ycar):
322 xc=classify(xe, xv)
323 print 'Pred:', xv, 'gives', xc, 'y is', yv
324