Package Bio :: Package KDTree :: Module KDTree
[hide private]
[frames] | no frames]

Source Code for Module Bio.KDTree.KDTree

  1  """ 
  2  The KD tree data structure can be used for all kinds of searches that 
  3  involve N-dimensional vectors, e.g.  neighbor searches (find all points 
  4  within a radius of a given point) or finding all point pairs in a set 
  5  that are within a certain radius of each other. See "Computational Geometry:  
  6  Algorithms and Applications" (Mark de Berg, Marc van Kreveld, Mark Overmars,  
  7  Otfried Schwarzkopf). Author: Thomas Hamelryck. 
  8  """ 
  9   
 10  try: 
 11      import Numeric 
 12      from Numeric import sum, sqrt 
 13      from RandomArray import * 
 14  except ImportError: 
 15      raise ImportError, "This module requires Numeric (precursor to NumPy)" 
 16   
 17  import CKDTree  
 18   
19 -def _dist(p, q):
20 diff=p-q 21 return sqrt(sum(diff*diff))
22
23 -def _neighbor_test(nr_points, dim, bucket_size, radius):
24 """ Test all fixed radius neighbor search. 25 26 Test all fixed radius neighbor search using the 27 KD tree C module. 28 29 o nr_points - number of points used in test 30 o dim - dimension of coords 31 o bucket_size - nr of points per tree node 32 o radius - radius of search (typically 0.05 or so) 33 """ 34 # KD tree search 35 kdt=CKDTree.KDTree(dim, bucket_size) 36 coords=random((nr_points, dim)).astype("f") 37 kdt.set_data(coords, nr_points) 38 kdt.neighbor_search(radius) 39 r=kdt.neighbor_get_radii() 40 if r is None: 41 l1=0 42 else: 43 l1=len(r) 44 # now do a slow search to compare results 45 kdt.neighbor_simple_search(radius) 46 r=kdt.neighbor_get_radii() 47 if r is None: 48 l2=0 49 else: 50 l2=len(r) 51 if l1==l2: 52 print "Passed." 53 else: 54 print "Not passed: %i <> %i." % (l1, l2)
55
56 -def _test(nr_points, dim, bucket_size, radius):
57 """Test neighbor search. 58 59 Test neighbor search using the KD tree C module. 60 61 o nr_points - number of points used in test 62 o dim - dimension of coords 63 o bucket_size - nr of points per tree node 64 o radius - radius of search (typically 0.05 or so) 65 """ 66 # kd tree search 67 kdt=CKDTree.KDTree(dim, bucket_size) 68 coords=random((nr_points, dim)).astype("f") 69 center=coords[0] 70 kdt.set_data(coords, nr_points) 71 kdt.search_center_radius(center, radius) 72 r=kdt.get_indices() 73 if r is None: 74 l1=0 75 else: 76 l1=len(r) 77 l2=0 78 # now do a manual search to compare results 79 for i in range(0, nr_points): 80 p=coords[i] 81 if _dist(p, center)<=radius: 82 l2=l2+1 83 if l1==l2: 84 print "Passed." 85 else: 86 print "Not passed: %i <> %i." % (l1, l2)
87
88 -class KDTree:
89 """ 90 KD tree implementation (C++, SWIG python wrapper) 91 92 The KD tree data structure can be used for all kinds of searches that 93 involve N-dimensional vectors, e.g. neighbor searches (find all points 94 within a radius of a given point) or finding all point pairs in a set 95 that are within a certain radius of each other. 96 97 Reference: 98 99 Computational Geometry: Algorithms and Applications 100 Second Edition 101 Mark de Berg, Marc van Kreveld, Mark Overmars, Otfried Schwarzkopf 102 published by Springer-Verlag 103 2nd rev. ed. 2000. 104 ISBN: 3-540-65620-0 105 106 The KD tree data structure is described in chapter 5, pg. 99. 107 108 The following article made clear to me that the nodes should 109 contain more than one point (this leads to dramatic speed 110 improvements for the "all fixed radius neighbor search", see 111 below): 112 113 JL Bentley, "Kd trees for semidynamic point sets," in Sixth Annual ACM 114 Symposium on Computational Geometry, vol. 91. San Francisco, 1990 115 116 This KD implementation also performs a "all fixed radius neighbor search", 117 i.e. it can find all point pairs in a set that are within a certain radius 118 of each other. As far as I know the algorithm has not been published. 119 """ 120
121 - def __init__(self, dim, bucket_size=1):
122 self.dim=dim 123 self.kdt=CKDTree.KDTree(dim, bucket_size) 124 self.built=0
125 126 # Set data 127
128 - def set_coords(self, coords):
129 """Add the coordinates of the points. 130 131 o coords - two dimensional Numeric array of type "f". E.g. if the 132 points have dimensionality D and there are N points, the coords 133 array should be NxD dimensional. 134 """ 135 if min(coords)<=-1e6 or max(coords)>=1e6: 136 raise Exception, "Points should lie between -1e6 and 1e6" 137 if len(coords.shape)!=2 or coords.shape[1]!=self.dim: 138 raise Exception, "Expected a Nx%i Numeric array" % self.dim 139 if coords.typecode()!="f": 140 raise Exception, "Expected a Numeric array of type float" 141 self.kdt.set_data(coords, coords.shape[0]) 142 self.built=1
143 144 # Fixed radius search for a point 145
146 - def search(self, center, radius):
147 """Search all points within radius of center. 148 149 o center - one dimensional Numeric array of type "f". E.g. if the 150 points have dimensionality D, the center array should be D 151 dimensional. 152 o radius - float>0 153 """ 154 if not self.built: 155 raise Exception, "No point set specified" 156 if center.shape!=(self.dim,): 157 raise Exception, "Expected a %i-dimensional Numeric array" % self.dim 158 if center.typecode()!="f": 159 raise Exception, "Expected a Numeric array of type float" 160 self.kdt.search_center_radius(center, radius)
161
162 - def get_radii(self):
163 """Return radii. 164 165 Return the list of distances from center after 166 a neighbor search. 167 """ 168 a=self.kdt.get_radii() 169 if a is None: 170 return [] 171 return a
172
173 - def get_indices(self):
174 """Return the list of indices. 175 176 Return the list of indices after a neighbor search. 177 The indices refer to the original coords Numeric array. The 178 coordinates with these indices were within radius of center. 179 180 For an index pair, the first index<second index. 181 """ 182 a=self.kdt.get_indices() 183 if a is None: 184 return [] 185 return a
186 187 # Fixed radius search for all points 188 189
190 - def all_search(self, radius):
191 """All fixed neighbor search. 192 193 Search all point pairs that are within radius. 194 195 o radius - float (>0) 196 """ 197 if not self.built: 198 raise Exception, "No point set specified" 199 self.kdt.neighbor_search(radius)
200
201 - def all_get_indices(self):
202 """Return All Fixed Neighbor Search results. 203 204 Return a Nx2 dim Numeric array containing 205 the indices of the point pairs, where N 206 is the number of neighbor pairs. 207 """ 208 a=self.kdt.neighbor_get_indices() 209 if a is None: 210 return [] 211 # return as Nx2 dim Numeric array, where N 212 # is number of neighbor pairs. 213 a.shape=(-1, 2) 214 return a
215
216 - def all_get_radii(self):
217 """Return All Fixed Neighbor Search results. 218 219 Return an N-dim array containing the distances 220 of all the point pairs, where N is the number 221 of neighbor pairs.. 222 """ 223 a=self.kdt.neighbor_get_radii() 224 if a is None: 225 return [] 226 return a
227 228 if __name__=="__main__": 229 230 from RandomArray import * 231 232 nr_points=100000 233 dim=3 234 bucket_size=10 235 query_radius=10 236 237 coords=(200*random((nr_points, dim))).astype("f") 238 239 kdtree=KDTree(dim, bucket_size) 240 241 # enter coords 242 kdtree.set_coords(coords) 243 244 # Find all point pairs within radius 245 246 kdtree.all_search(query_radius) 247 248 # get indices & radii of points 249 250 # indices is a list of tuples. Each tuple contains the 251 # two indices of a point pair within query_radius of 252 # each other. 253 indices=kdtree.all_get_indices() 254 radii=kdtree.all_get_radii() 255 256 print "Found %i point pairs within radius %f." % (len(indices), query_radius) 257 258 # Do 10 individual queries 259 260 for i in range(0, 10): 261 # pick a random center 262 center=random(dim).astype("f") 263 264 # search neighbors 265 kdtree.search(center, query_radius) 266 267 # get indices & radii of points 268 indices=kdtree.get_indices() 269 radii=kdtree.get_radii() 270 271 x, y, z=center 272 print "Found %i points in radius %f around center (%.2f, %.2f, %.2f)." % (len(indices), query_radius, x, y, z) 273