Package Bio :: Package Nexus :: Module Trees
[hide private]
[frames] | no frames]

Source Code for Module Bio.Nexus.Trees

  1   
  2  # 
  3  # Trees.py  
  4  # 
  5  # Copyright 2005-2008 by Frank Kauff & Cymon J. Cox. All rights reserved. 
  6  # This code is part of the Biopython distribution and governed by its 
  7  # license. Please see the LICENSE file that should have been included 
  8  # as part of this package. 
  9  # 
 10  # Tree class handles phylogenetic trees. Provides a set of methods to read and write newick-format tree 
 11  # descriptions, get information about trees (monphyly of taxon sets, congruence between trees, common ancestors,...) 
 12  # and to manipulate trees (reroot trees, split terminal nodes). 
 13  # 
 14  # Bug reports welcome: fkauff@biologie.uni-kl.de 
 15  # 
 16   
 17  import sys, random, sets 
 18  import Nodes 
 19   
 20  PRECISION_BRANCHLENGTH=6 
 21  PRECISION_SUPPORT=6 
 22   
23 -class TreeError(Exception): pass
24
25 -class NodeData:
26 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
27 - def __init__(self,taxon=None,branchlength=0.0,support=None):
28 self.taxon=taxon 29 self.branchlength=branchlength 30 self.support=support
31
32 -class Tree(Nodes.Chain):
33 """Represents a tree using a chain of nodes with on predecessor (=ancestor) 34 and multiple successors (=subclades). 35 """ 36 # A newick tree is parsed into nested list and then converted to a node list in two stages 37 # mostly due to historical reasons. This could be done in one swoop). Note: parentheses ( ) and 38 # colon : are not allowed in taxon names. This is against NEXUS standard, but makes life much 39 # easier when parsing trees. 40 41 ## NOTE: Tree should store its data class in something like self.dataclass=data, 42 ## so that nodes that are generated have easy access to the data class 43 ## Some routines use automatically NodeData, this needs to be more concise 44
45 - def __init__(self,tree=None,weight=1.0,rooted=False,name='',data=NodeData,values_are_support=False,max_support=1.0):
46 """Ntree(self,tree).""" 47 Nodes.Chain.__init__(self) 48 self.dataclass=data 49 self.__values_are_support=values_are_support 50 self.max_support=max_support 51 self.weight=weight 52 self.rooted=rooted 53 self.name=name 54 root=Nodes.Node(data()) 55 self.add(root) 56 self.root=root.id 57 if tree: # use the tree we have 58 # if Tree is called from outside Nexus parser, we need to get rid of linebreaks, etc 59 tree=tree.strip().replace('\n','').replace('\r','') 60 # there's discrepancy whether newick allows semicolons et the end 61 tree=tree.rstrip(';') 62 self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
63
64 - def _parse(self,tree):
65 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively.""" 66 67 #Remove any leading/trailing white space - want any string starting 68 #with " (..." should be recognised as a leaf, "(..." 69 tree = tree.strip() 70 if tree.count('(')!=tree.count(')'): 71 raise TreeError, 'Parentheses do not match in (sub)tree: '+tree 72 if tree.count('(')==0: # a leaf 73 colon=tree.rfind(':') 74 if colon>-1: 75 return [tree[:colon],self._get_values(tree[colon+1:])] 76 else: 77 return [tree,[None]] 78 else: 79 closing=tree.rfind(')') 80 val=self._get_values(tree[closing+1:]) 81 if not val: 82 val=[None] 83 subtrees=[] 84 plevel=0 85 prev=1 86 for p in range(1,closing): 87 if tree[p]=='(': 88 plevel+=1 89 elif tree[p]==')': 90 plevel-=1 91 elif tree[p]==',' and plevel==0: 92 subtrees.append(tree[prev:p]) 93 prev=p+1 94 subtrees.append(tree[prev:closing]) 95 subclades=[self._parse(subtree) for subtree in subtrees] 96 return [subclades,val]
97
98 - def _add_subtree(self,parent_id=None,tree=None):
99 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree).""" 100 101 if parent_id is None: 102 raise TreeError('Need node_id to connect to.') 103 for st in tree: 104 if type(st[0])==list: # it's a subtree 105 nd=self.dataclass() 106 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 107 nd.support=st[1][0] 108 if st[1][1] is not None: 109 nd.branchlength=st[1][1] 110 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 111 if not self.__values_are_support: # default 112 if st[1][0] is not None: 113 nd.branchlength=st[1][0] 114 else: 115 nd.support=st[1][0] 116 sn=Nodes.Node(nd) 117 self.add(sn,parent_id) 118 self._add_subtree(sn.id,st[0]) 119 else: # it's a leaf 120 nd=self.dataclass() 121 nd.taxon=st[0] 122 if len(st)>1: 123 if len(st[1])>=2: # if there's two values, support comes first. Is that always so? 124 nd.support=st[1][0] 125 if st[1][1] is not None: 126 nd.branchlength=st[1][1] 127 elif len(st[1])==1: # otherwise it could be real branchlengths or support as branchlengths 128 if not self.__values_are_support: # default 129 if st[1][0] is not None: 130 nd.branchlength=st[1][0] 131 else: 132 nd.support=st[1][0] 133 leaf=Nodes.Node(nd) 134 self.add(leaf,parent_id)
135
136 - def _get_values(self, text):
137 """Extracts values (support/branchlength) from xx[:yyy], xx.""" 138 139 if text=='': 140 return None 141 return [float(t) for t in text.split(':') if t.strip()]
142
143 - def _walk(self,node=None):
144 """Return all node_ids downwards from a node.""" 145 146 if node is None: 147 node=self.root 148 for n in self.node(node).succ: 149 yield n 150 for sn in self._walk(n): 151 yield sn
152
153 - def node(self,node_id):
154 """Return the instance of node_id. 155 156 node = node(self,node_id) 157 """ 158 if node_id not in self.chain: 159 raise TreeError('Unknown node_id: %d' % node_id) 160 return self.chain[node_id]
161
162 - def split(self,parent_id=None,n=2,branchlength=1.0):
163 """Speciation: generates n (default two) descendants of a node. 164 165 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0): 166 """ 167 if parent_id is None: 168 raise TreeError('Missing node_id.') 169 ids=[] 170 parent_data=self.chain[parent_id].data 171 for i in range(n): 172 node=Nodes.Node() 173 if parent_data: 174 node.data=self.dataclass() 175 # each node has taxon and branchlength attribute 176 if parent_data.taxon: 177 node.data.taxon=parent_data.taxon+str(i) 178 node.data.branchlength=branchlength 179 ids.append(self.add(node,parent_id)) 180 return ids
181
182 - def search_taxon(self,taxon):
183 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes. 184 185 node_id = search_taxon(self,taxon) 186 """ 187 for id,node in self.chain.items(): 188 if node.data.taxon==taxon: 189 return id 190 return None
191
192 - def prune(self,taxon):
193 """Prunes a terminal taxon from the tree. 194 195 id_of_previous_node = prune(self,taxon) 196 If taxon is from a bifurcation, the connectiong node will be collapsed 197 and its branchlength added to remaining terminal node. This might be no 198 longer a meaningful value' 199 """ 200 201 id=self.search_taxon(taxon) 202 if id is None: 203 raise TreeError('Taxon not found: %s' % taxon) 204 elif id not in self.get_terminals(): 205 raise TreeError('Not a terminal taxon: %s' % taxon) 206 else: 207 prev=self.unlink(id) 208 self.kill(id) 209 if len(self.node(prev).succ)==1: 210 if prev==self.root: # we deleted one branch of a bifurcating root, then we have to move the root upwards 211 self.root=self.node(self.root).succ[0] 212 self.node(self.root).branchlength=0.0 213 self.kill(prev) 214 else: 215 succ=self.node(prev).succ[0] 216 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength 217 self.collapse(prev) 218 self.node(succ).data.branchlength=new_bl 219 return prev
220
221 - def get_taxa(self,node_id=None):
222 """Return a list of all otus downwards from a node (self, node_id). 223 224 nodes = get_taxa(self,node_id=None) 225 """ 226 227 if node_id is None: 228 node_id=self.root 229 if node_id not in self.chain: 230 raise TreeError('Unknown node_id: %d.' % node_id) 231 if self.chain[node_id].succ==[]: 232 if self.chain[node_id].data: 233 return [self.chain[node_id].data.taxon] 234 else: 235 return None 236 else: 237 list=[] 238 for succ in self.chain[node_id].succ: 239 list.extend(self.get_taxa(succ)) 240 return list
241
242 - def get_terminals(self):
243 """Return a list of all terminal nodes.""" 244 return [i for i in self.all_ids() if self.node(i).succ==[]]
245
246 - def sum_branchlength(self,root=None,node=None):
247 """Adds up the branchlengths from root (default self.root) to node. 248 249 sum = sum_branchlength(self,root=None,node=None) 250 """ 251 252 if root is None: 253 root=self.root 254 if node is None: 255 raise TreeError('Missing node id.') 256 blen=0.0 257 while node is not None and node is not root: 258 blen+=self.node(node).data.branchlength 259 node=self.node(node).prev 260 return blen
261
262 - def set_subtree(self,node):
263 """Return subtree as a set of nested sets. 264 265 sets = set_subtree(self,node) 266 """ 267 268 if self.node(node).succ==[]: 269 return self.node(node).data.taxon 270 else: 271 return sets.Set([self.set_subtree(n) for n in self.node(node).succ])
272
273 - def is_identical(self,tree2):
274 """Compare tree and tree2 for identity. 275 276 result = is_identical(self,tree2) 277 """ 278 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
279
280 - def is_compatible(self,tree2,threshold,strict=True):
281 """Compares branches with support>threshold for compatibility. 282 283 result = is_compatible(self,tree2,threshold) 284 """ 285 286 # check if both trees have the same set of taxa. strict=True enforces this. 287 missing2=sets.Set(self.get_taxa())-sets.Set(tree2.get_taxa()) 288 missing1=sets.Set(tree2.get_taxa())-sets.Set(self.get_taxa()) 289 if strict and (missing1 or missing2): 290 if missing1: 291 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name) 292 if missing2: 293 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name) 294 raise TreeError, 'Can\'t compare trees with different taxon compositions.' 295 t1=[(sets.Set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \ 296 self.node(n).succ and\ 297 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)] 298 t2=[(sets.Set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \ 299 tree2.node(n).succ and\ 300 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)] 301 conflict=[] 302 for (st1,sup1) in t1: 303 for (st2,sup2) in t2: 304 if not st1.issubset(st2) and not st2.issubset(st1): # don't hiccup on upstream nodes 305 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2 # all three are non-empty sets 306 # if notin1==missing1 or notin2==missing2 <==> st1.issubset(st2) or st2.issubset(st1) ??? 307 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)): # omit conflicts due to missing taxa 308 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2)) 309 return conflict
310
311 - def common_ancestor(self,node1,node2):
312 """Return the common ancestor that connects to nodes. 313 314 node_id = common_ancestor(self,node1,node2) 315 """ 316 317 l1=[self.root]+self.trace(self.root,node1) 318 l2=[self.root]+self.trace(self.root,node2) 319 return [n for n in l1 if n in l2][-1]
320 321
322 - def distance(self,node1,node2):
323 """Add and return the sum of the branchlengths between two nodes. 324 dist = distance(self,node1,node2) 325 """ 326 327 ca=self.common_ancestor(node1,node2) 328 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
329
330 - def is_monophyletic(self,taxon_list):
331 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise. 332 333 result = is_monophyletic(self,taxon_list) 334 """ 335 if isinstance(taxon_list,str): 336 taxon_set=sets.Set([taxon_list]) 337 else: 338 taxon_set=sets.Set(taxon_list) 339 node_id=self.root 340 while 1: 341 subclade_taxa=sets.Set(self.get_taxa(node_id)) 342 if subclade_taxa==taxon_set: # are we there? 343 return node_id 344 else: # check subnodes 345 for subnode in self.chain[node_id].succ: 346 if sets.Set(self.get_taxa(subnode)).issuperset(taxon_set): # taxon_set is downstream 347 node_id=subnode 348 break # out of for loop 349 else: 350 return -1 # taxon set was not with successors, for loop exhausted
351
352 - def is_bifurcating(self,node=None):
353 """Return True if tree downstream of node is strictly bifurcating.""" 354 if not node: 355 node=self.root 356 if node==self.root and len(self.node(node).succ)==3: #root can be trifurcating, because it has no ancestor 357 return self.is_bifurcating(self.node(node).succ[0]) and \ 358 self.is_bifurcating(self.node(node).succ[1]) and \ 359 self.is_bifurcating(self.node(node).succ[2]) 360 if len(self.node(node).succ)==2: 361 return self.is_bifurcating(self.node(node).succ[0]) and self.is_bifurcating(self.node(node).succ[1]) 362 elif len(self.node(node).succ)==0: 363 return True 364 else: 365 return False
366 367 368
369 - def branchlength2support(self):
370 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0 371 372 This is necessary when support has been stored as branchlength (e.g. paup), and has thus 373 been read in as branchlength. 374 """ 375 376 for n in self.chain.keys(): 377 self.node(n).data.support=self.node(n).data.branchlength 378 self.node(n).data.branchlength=0.0
379
380 - def convert_absolute_support(self,nrep):
381 """Convert absolute support (clade-count) to rel. frequencies. 382 383 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of 384 calculating relative frequencies.""" 385 386 for n in self._walk(): 387 if self.node(n).data.support: 388 self.node(n).data.support/=float(nrep)
389
390 - def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
391 """Generates a random tree with ntax taxa and/or taxa from taxlabels. 392 393 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True) 394 Trees are bifurcating by default. (Polytomies not yet supported). 395 """ 396 397 if not ntax and taxon_list: 398 ntax=len(taxon_list) 399 elif not taxon_list and ntax: 400 taxon_list=['taxon'+str(i+1) for i in range(ntax)] 401 elif not ntax and not taxon_list: 402 raise TreeError('Either numer of taxa or list of taxa must be specified.') 403 elif ntax<>len(taxon_list): 404 raise TreeError('Length of taxon list must correspond to ntax.') 405 # initiate self with empty root 406 self.__init__() 407 terminals=self.get_terminals() 408 # bifurcate randomly at terminal nodes until ntax is reached 409 while len(terminals)<ntax: 410 newsplit=random.choice(terminals) 411 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength) 412 # if desired, give some variation to the branch length 413 if branchlength_sd: 414 for nt in new_terminals: 415 bl=random.gauss(branchlength,branchlength_sd) 416 if bl<0: 417 bl=0 418 self.node(nt).data.branchlength=bl 419 terminals.extend(new_terminals) 420 terminals.remove(newsplit) 421 # distribute taxon labels randomly 422 random.shuffle(taxon_list) 423 for (node,name) in zip(terminals,taxon_list): 424 self.node(node).data.taxon=name
425
426 - def display(self):
427 """Quick and dirty lists of all nodes.""" 428 table=[('#','taxon','prev','succ','brlen','blen (sum)','support')] 429 for i in self.all_ids(): 430 n=self.node(i) 431 if not n.data: 432 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-')) 433 else: 434 tx=n.data.taxon 435 if not tx: 436 tx='-' 437 blength=n.data.branchlength 438 if blength is None: 439 blength='-' 440 sum_blength='-' 441 else: 442 sum_blength=self.sum_branchlength(node=i) 443 support=n.data.support 444 if support is None: 445 support='-' 446 table.append((str(i),tx,str(n.prev),str(n.succ),blength,sum_blength,support)) 447 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s' % l for l in table]) 448 print '\nRoot: ',self.root
449
450 - def to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True,plain_newick=False):
451 """Return a paup compatible tree line. 452 453 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True) 454 """ 455 # if there's a conflict in the arguments, we override plain=True 456 if support_as_branchlengths or branchlengths_only: 457 plain=False 458 self.support_as_branchlengths=support_as_branchlengths 459 self.branchlengths_only=branchlengths_only 460 self.plain=plain 461 462 def make_info_string(data,terminal=False): 463 """Creates nicely formatted support/branchlengths.""" 464 # CHECK FORMATTING 465 if self.plain: # plain tree only. That's easy. 466 return '' 467 elif self.support_as_branchlengths: # support as branchlengths (eg. PAUP), ignore actual branchlengths 468 if terminal: # terminal branches have 100% support 469 return ':%1.2f' % self.max_support 470 else: 471 return ':%1.2f' % (data.support) 472 elif self.branchlengths_only: # write only branchlengths, ignore support 473 return ':%1.5f' % (data.branchlength) 474 else: # write suport and branchlengths (e.g. .con tree of mrbayes) 475 if terminal: 476 return ':%1.5f' % (data.branchlength) 477 else: 478 if data.branchlength is not None and data.support is not None: # we have blen and suppport 479 return '%1.2f:%1.5f' % (data.support,data.branchlength) 480 elif data.branchlength is not None: # we have only blen 481 return '0.00000:%1.5f' % (data.branchlength) 482 elif data.support is not None: # we have only support 483 return '%1.2f:0.00000' % (data.support) 484 else: 485 return '0.00:0.00000'
486 487 def newickize(node): 488 """Convert a node tree to a newick tree recursively.""" 489 490 if not self.node(node).succ: #terminal 491 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True) 492 else: 493 return '(%s)%s' % (','.join(map(newickize,self.node(node).succ)),make_info_string(self.node(node).data)) 494 return subtree
495 496 treeline=['tree'] 497 if self.name: 498 treeline.append(self.name) 499 else: 500 treeline.append('a_tree') 501 treeline.append('=') 502 if self.weight<>1: 503 treeline.append('[&W%s]' % str(round(float(self.weight),3))) 504 if self.rooted: 505 treeline.append('[&R]') 506 treeline.append('(%s)' % ','.join(map(newickize,self.node(self.root).succ))) 507 if plain_newick: 508 return treeline[-1] 509 else: 510 return ' '.join(treeline)+';' 511
512 - def __str__(self):
513 """Short version of to_string(), gives plain tree""" 514 return self.to_string(plain=True)
515
516 - def unroot(self):
517 """Defines a unrooted Tree structure, using data of a rooted Tree.""" 518 519 # travel down the rooted tree structure and save all branches and the nodes they connect 520 521 def _get_branches(node): 522 branches=[] 523 for b in self.node(node).succ: 524 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support]) 525 branches.extend(_get_branches(b)) 526 return branches
527 528 self.unrooted=_get_branches(self.root) 529 # if root is bifurcating, then it is eliminated 530 if len(self.node(self.root).succ)==2: 531 # find the two branches that connect to root 532 rootbranches=[b for b in self.unrooted if self.root in b[:2]] 533 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0])) 534 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1])) 535 # Connect them two each other. If both have support, it should be identical (or one set to None?). 536 # If both have branchlengths, they will be added 537 newbranch=[b1[1],b2[1],b1[2]+b2[2]] 538 if b1[3] is None: 539 newbranch.append(b2[3]) # either None (both rootbranches are unsupported) or some support 540 elif b2[3] is None: 541 newbranch.append(b1[3]) # dito 542 elif b1[3]==b2[3]: 543 newbranch.append(b1[3]) # identical support 544 elif b1[3]==0 or b2[3]==0: 545 newbranch.append(b1[3]+b2[3]) # one is 0, take the other 546 else: 547 raise TreeError, 'Support mismatch in bifurcating root: %f, %f' % (float(b1[3]),float(b2[3])) 548 self.unrooted.append(newbranch) 549
550 - def root_with_outgroup(self,outgroup=None):
551 552 def _connect_subtree(parent,child): 553 """Hook subtree starting with node child to parent.""" 554 for i,branch in enumerate(self.unrooted): 555 if parent in branch[:2] and child in branch[:2]: 556 branch=self.unrooted.pop(i) 557 break 558 else: 559 raise TreeError, 'Unable to connect nodes for rooting: nodes %d and %d are not connected' % (parent,child) 560 self.link(parent,child) 561 self.node(child).data.branchlength=branch[2] 562 self.node(child).data.support=branch[3] 563 #now check if there are more branches connected to the child, and if so, connect them 564 child_branches=[b for b in self.unrooted if child in b[:2]] 565 for b in child_branches: 566 if child==b[0]: 567 succ=b[1] 568 else: 569 succ=b[0] 570 _connect_subtree(child,succ)
571 572 # check the outgroup we're supposed to root with 573 if outgroup is None: 574 return self.root 575 outgroup_node=self.is_monophyletic(outgroup) 576 if outgroup_node==-1: 577 return -1 578 # if tree is already rooted with outgroup on a bifurcating root, 579 # or the outgroup includes all taxa on the tree, then we're fine 580 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root: 581 return self.root 582 583 self.unroot() 584 # now we find the branch that connects outgroup and ingroup 585 #print self.node(outgroup_node).prev 586 for i,b in enumerate(self.unrooted): 587 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]: 588 root_branch=self.unrooted.pop(i) 589 break 590 else: 591 raise TreeError, 'Unrooted and rooted Tree do not match' 592 if outgroup_node==root_branch[1]: 593 ingroup_node=root_branch[0] 594 else: 595 ingroup_node=root_branch[1] 596 # now we destroy the old tree structure, but keep node data. Nodes will be reconnected according to new outgroup 597 for n in self.all_ids(): 598 self.node(n).prev=None 599 self.node(n).succ=[] 600 # now we just add both subtrees (outgroup and ingroup) branch for branch 601 root=Nodes.Node(data=NodeData()) # new root 602 self.add(root) # add to tree description 603 self.root=root.id # set as root 604 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]]) # add branch to ingroup to unrooted tree 605 self.unrooted.append([root.id,outgroup_node,0.0,0.0]) # add branch to outgroup to unrooted tree 606 _connect_subtree(root.id,ingroup_node) # add ingroup 607 _connect_subtree(root.id,outgroup_node) # add outgroup 608 # if theres still a lonely node in self.chain, then it's the old root, and we delete it 609 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root] 610 if len(oldroot)>1: 611 raise TreeError, 'Isolated nodes in tree description: %s' % ','.join(oldroot) 612 elif len(oldroot)==1: 613 self.kill(oldroot[0]) 614 return self.root 615 616
617 -def consensus(trees, threshold=0.5,outgroup=None):
618 """Compute a majority rule consensus tree of all clades with relative frequency>=threshold from a list of trees.""" 619 620 total=len(trees) 621 622 if total==0: 623 return None 624 # shouldn't we make sure that it's NodeData or subclass?? 625 dataclass=trees[0].dataclass 626 max_support=trees[0].max_support 627 clades={} 628 #countclades={} 629 alltaxa=sets.Set(trees[0].get_taxa()) 630 # calculate calde frequencies 631 c=0 632 for t in trees: 633 c+=1 634 #print c 635 #if c%1==0: 636 # print c 637 if alltaxa!=sets.Set(t.get_taxa()): 638 raise TreeError, 'Trees for consensus must contain the same taxa' 639 t.root_with_outgroup(outgroup=outgroup) 640 for st_node in t._walk(t.root): 641 subclade_taxa=t.get_taxa(st_node) 642 subclade_taxa.sort() 643 subclade_taxa=str(subclade_taxa) # lists are not hashable 644 if subclade_taxa in clades: 645 clades[subclade_taxa]+=float(t.weight)/total 646 else: 647 clades[subclade_taxa]=float(t.weight)/total 648 #if subclade_taxa in countclades: 649 # countclades[subclade_taxa]+=t.weight 650 #else: 651 # countclades[subclade_taxa]=t.weight 652 # weed out clades below threshold 653 for (c,p) in clades.items(): 654 if p<threshold: 655 del clades[c] 656 # create a tree with a root node 657 consensus=Tree(name='consensus_%2.1f' % float(threshold),data=dataclass) 658 # each clade needs a node in the new tree, add them as isolated nodes 659 for (c,s) in clades.items(): 660 node=Nodes.Node(data=dataclass()) 661 node.data.support=s 662 node.data.taxon=sets.Set(eval(c)) 663 consensus.add(node) 664 # set root node data 665 consensus.node(consensus.root).data.support=None 666 consensus.node(consensus.root).data.taxon=alltaxa 667 # we sort the nodes by no. of taxa in the clade, so root will be the last 668 consensus_ids=consensus.all_ids() 669 consensus_ids.sort(lambda x,y:len(consensus.node(x).data.taxon)-len(consensus.node(y).data.taxon)) 670 # now we just have to hook each node to the next smallest node that includes all taxa of the current 671 for i,current in enumerate(consensus_ids[:-1]): # skip the last one which is the root 672 #print '----' 673 #print 'current: ',consensus.node(current).data.taxon 674 # search remaining nodes 675 for parent in consensus_ids[i+1:]: 676 #print 'parent: ',consensus.node(parent).data.taxon 677 if consensus.node(parent).data.taxon.issuperset(consensus.node(current).data.taxon): 678 break 679 else: 680 sys.exit('corrupt tree structure?') 681 # internal nodes don't have taxa 682 if len(consensus.node(current).data.taxon)==1: 683 consensus.node(current).data.taxon=consensus.node(current).data.taxon.pop() 684 # reset the support for terminal nodes to maximum 685 #consensus.node(current).data.support=max_support 686 else: 687 consensus.node(current).data.taxon=None 688 consensus.link(parent,current) 689 # eliminate root taxon name 690 consensus.node(consensus_ids[-1]).data.taxon=None 691 return consensus
692