1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 import sys, random, sets
18 import Nodes
19
20 PRECISION_BRANCHLENGTH=6
21 PRECISION_SUPPORT=6
22
24
26 """Stores tree-relevant data associated with nodes (e.g. branches or otus)."""
27 - def __init__(self,taxon=None,branchlength=0.0,support=None):
28 self.taxon=taxon
29 self.branchlength=branchlength
30 self.support=support
31
32 -class Tree(Nodes.Chain):
33 """Represents a tree using a chain of nodes with on predecessor (=ancestor)
34 and multiple successors (=subclades).
35 """
36
37
38
39
40
41
42
43
44
46 """Ntree(self,tree)."""
47 Nodes.Chain.__init__(self)
48 self.dataclass=data
49 self.__values_are_support=values_are_support
50 self.max_support=max_support
51 self.weight=weight
52 self.rooted=rooted
53 self.name=name
54 root=Nodes.Node(data())
55 self.add(root)
56 self.root=root.id
57 if tree:
58
59 tree=tree.strip().replace('\n','').replace('\r','')
60
61 tree=tree.rstrip(';')
62 self._add_subtree(parent_id=root.id,tree=self._parse(tree)[0])
63
65 """Parses (a,b,c...)[[[xx]:]yy] into subcomponents and travels down recursively."""
66
67
68
69 tree = tree.strip()
70 if tree.count('(')!=tree.count(')'):
71 raise TreeError, 'Parentheses do not match in (sub)tree: '+tree
72 if tree.count('(')==0:
73 colon=tree.rfind(':')
74 if colon>-1:
75 return [tree[:colon],self._get_values(tree[colon+1:])]
76 else:
77 return [tree,[None]]
78 else:
79 closing=tree.rfind(')')
80 val=self._get_values(tree[closing+1:])
81 if not val:
82 val=[None]
83 subtrees=[]
84 plevel=0
85 prev=1
86 for p in range(1,closing):
87 if tree[p]=='(':
88 plevel+=1
89 elif tree[p]==')':
90 plevel-=1
91 elif tree[p]==',' and plevel==0:
92 subtrees.append(tree[prev:p])
93 prev=p+1
94 subtrees.append(tree[prev:closing])
95 subclades=[self._parse(subtree) for subtree in subtrees]
96 return [subclades,val]
97
99 """Adds leaf or tree (in newick format) to a parent_id. (self,parent_id,tree)."""
100
101 if parent_id is None:
102 raise TreeError('Need node_id to connect to.')
103 for st in tree:
104 if type(st[0])==list:
105 nd=self.dataclass()
106 if len(st[1])>=2:
107 nd.support=st[1][0]
108 if st[1][1] is not None:
109 nd.branchlength=st[1][1]
110 elif len(st[1])==1:
111 if not self.__values_are_support:
112 if st[1][0] is not None:
113 nd.branchlength=st[1][0]
114 else:
115 nd.support=st[1][0]
116 sn=Nodes.Node(nd)
117 self.add(sn,parent_id)
118 self._add_subtree(sn.id,st[0])
119 else:
120 nd=self.dataclass()
121 nd.taxon=st[0]
122 if len(st)>1:
123 if len(st[1])>=2:
124 nd.support=st[1][0]
125 if st[1][1] is not None:
126 nd.branchlength=st[1][1]
127 elif len(st[1])==1:
128 if not self.__values_are_support:
129 if st[1][0] is not None:
130 nd.branchlength=st[1][0]
131 else:
132 nd.support=st[1][0]
133 leaf=Nodes.Node(nd)
134 self.add(leaf,parent_id)
135
137 """Extracts values (support/branchlength) from xx[:yyy], xx."""
138
139 if text=='':
140 return None
141 return [float(t) for t in text.split(':') if t.strip()]
142
143 - def _walk(self,node=None):
144 """Return all node_ids downwards from a node."""
145
146 if node is None:
147 node=self.root
148 for n in self.node(node).succ:
149 yield n
150 for sn in self._walk(n):
151 yield sn
152
153 - def node(self,node_id):
154 """Return the instance of node_id.
155
156 node = node(self,node_id)
157 """
158 if node_id not in self.chain:
159 raise TreeError('Unknown node_id: %d' % node_id)
160 return self.chain[node_id]
161
162 - def split(self,parent_id=None,n=2,branchlength=1.0):
163 """Speciation: generates n (default two) descendants of a node.
164
165 [new ids] = split(self,parent_id=None,n=2,branchlength=1.0):
166 """
167 if parent_id is None:
168 raise TreeError('Missing node_id.')
169 ids=[]
170 parent_data=self.chain[parent_id].data
171 for i in range(n):
172 node=Nodes.Node()
173 if parent_data:
174 node.data=self.dataclass()
175
176 if parent_data.taxon:
177 node.data.taxon=parent_data.taxon+str(i)
178 node.data.branchlength=branchlength
179 ids.append(self.add(node,parent_id))
180 return ids
181
183 """Returns the first matching taxon in self.data.taxon. Not restricted to terminal nodes.
184
185 node_id = search_taxon(self,taxon)
186 """
187 for id,node in self.chain.items():
188 if node.data.taxon==taxon:
189 return id
190 return None
191
193 """Prunes a terminal taxon from the tree.
194
195 id_of_previous_node = prune(self,taxon)
196 If taxon is from a bifurcation, the connectiong node will be collapsed
197 and its branchlength added to remaining terminal node. This might be no
198 longer a meaningful value'
199 """
200
201 id=self.search_taxon(taxon)
202 if id is None:
203 raise TreeError('Taxon not found: %s' % taxon)
204 elif id not in self.get_terminals():
205 raise TreeError('Not a terminal taxon: %s' % taxon)
206 else:
207 prev=self.unlink(id)
208 self.kill(id)
209 if len(self.node(prev).succ)==1:
210 if prev==self.root:
211 self.root=self.node(self.root).succ[0]
212 self.node(self.root).branchlength=0.0
213 self.kill(prev)
214 else:
215 succ=self.node(prev).succ[0]
216 new_bl=self.node(prev).data.branchlength+self.node(succ).data.branchlength
217 self.collapse(prev)
218 self.node(succ).data.branchlength=new_bl
219 return prev
220
222 """Return a list of all otus downwards from a node (self, node_id).
223
224 nodes = get_taxa(self,node_id=None)
225 """
226
227 if node_id is None:
228 node_id=self.root
229 if node_id not in self.chain:
230 raise TreeError('Unknown node_id: %d.' % node_id)
231 if self.chain[node_id].succ==[]:
232 if self.chain[node_id].data:
233 return [self.chain[node_id].data.taxon]
234 else:
235 return None
236 else:
237 list=[]
238 for succ in self.chain[node_id].succ:
239 list.extend(self.get_taxa(succ))
240 return list
241
243 """Return a list of all terminal nodes."""
244 return [i for i in self.all_ids() if self.node(i).succ==[]]
245
247 """Adds up the branchlengths from root (default self.root) to node.
248
249 sum = sum_branchlength(self,root=None,node=None)
250 """
251
252 if root is None:
253 root=self.root
254 if node is None:
255 raise TreeError('Missing node id.')
256 blen=0.0
257 while node is not None and node is not root:
258 blen+=self.node(node).data.branchlength
259 node=self.node(node).prev
260 return blen
261
263 """Return subtree as a set of nested sets.
264
265 sets = set_subtree(self,node)
266 """
267
268 if self.node(node).succ==[]:
269 return self.node(node).data.taxon
270 else:
271 return sets.Set([self.set_subtree(n) for n in self.node(node).succ])
272
274 """Compare tree and tree2 for identity.
275
276 result = is_identical(self,tree2)
277 """
278 return self.set_subtree(self.root)==tree2.set_subtree(tree2.root)
279
281 """Compares branches with support>threshold for compatibility.
282
283 result = is_compatible(self,tree2,threshold)
284 """
285
286
287 missing2=sets.Set(self.get_taxa())-sets.Set(tree2.get_taxa())
288 missing1=sets.Set(tree2.get_taxa())-sets.Set(self.get_taxa())
289 if strict and (missing1 or missing2):
290 if missing1:
291 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing1) , self.name)
292 if missing2:
293 print 'Taxon/taxa %s is/are missing in tree %s' % (','.join(missing2) , tree2.name)
294 raise TreeError, 'Can\'t compare trees with different taxon compositions.'
295 t1=[(sets.Set(self.get_taxa(n)),self.node(n).data.support) for n in self.all_ids() if \
296 self.node(n).succ and\
297 (self.node(n).data and self.node(n).data.support and self.node(n).data.support>=threshold)]
298 t2=[(sets.Set(tree2.get_taxa(n)),tree2.node(n).data.support) for n in tree2.all_ids() if \
299 tree2.node(n).succ and\
300 (tree2.node(n).data and tree2.node(n).data.support and tree2.node(n).data.support>=threshold)]
301 conflict=[]
302 for (st1,sup1) in t1:
303 for (st2,sup2) in t2:
304 if not st1.issubset(st2) and not st2.issubset(st1):
305 intersect,notin1,notin2=st1 & st2, st2-st1, st1-st2
306
307 if intersect and not (notin1.issubset(missing1) or notin2.issubset(missing2)):
308 conflict.append((st1,sup1,st2,sup2,intersect,notin1,notin2))
309 return conflict
310
312 """Return the common ancestor that connects to nodes.
313
314 node_id = common_ancestor(self,node1,node2)
315 """
316
317 l1=[self.root]+self.trace(self.root,node1)
318 l2=[self.root]+self.trace(self.root,node2)
319 return [n for n in l1 if n in l2][-1]
320
321
323 """Add and return the sum of the branchlengths between two nodes.
324 dist = distance(self,node1,node2)
325 """
326
327 ca=self.common_ancestor(node1,node2)
328 return self.sum_branchlength(ca,node1)+self.sum_branchlength(ca,node2)
329
331 """Return node_id of common ancestor if taxon_list is monophyletic, -1 otherwise.
332
333 result = is_monophyletic(self,taxon_list)
334 """
335 if isinstance(taxon_list,str):
336 taxon_set=sets.Set([taxon_list])
337 else:
338 taxon_set=sets.Set(taxon_list)
339 node_id=self.root
340 while 1:
341 subclade_taxa=sets.Set(self.get_taxa(node_id))
342 if subclade_taxa==taxon_set:
343 return node_id
344 else:
345 for subnode in self.chain[node_id].succ:
346 if sets.Set(self.get_taxa(subnode)).issuperset(taxon_set):
347 node_id=subnode
348 break
349 else:
350 return -1
351
366
367
368
370 """Move values stored in data.branchlength to data.support, and set branchlength to 0.0
371
372 This is necessary when support has been stored as branchlength (e.g. paup), and has thus
373 been read in as branchlength.
374 """
375
376 for n in self.chain.keys():
377 self.node(n).data.support=self.node(n).data.branchlength
378 self.node(n).data.branchlength=0.0
379
381 """Convert absolute support (clade-count) to rel. frequencies.
382
383 Some software (e.g. PHYLIP consense) just calculate how often clades appear, instead of
384 calculating relative frequencies."""
385
386 for n in self._walk():
387 if self.node(n).data.support:
388 self.node(n).data.support/=float(nrep)
389
390 - def randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True):
391 """Generates a random tree with ntax taxa and/or taxa from taxlabels.
392
393 new_tree = randomize(self,ntax=None,taxon_list=None,branchlength=1.0,branchlength_sd=None,bifurcate=True)
394 Trees are bifurcating by default. (Polytomies not yet supported).
395 """
396
397 if not ntax and taxon_list:
398 ntax=len(taxon_list)
399 elif not taxon_list and ntax:
400 taxon_list=['taxon'+str(i+1) for i in range(ntax)]
401 elif not ntax and not taxon_list:
402 raise TreeError('Either numer of taxa or list of taxa must be specified.')
403 elif ntax<>len(taxon_list):
404 raise TreeError('Length of taxon list must correspond to ntax.')
405
406 self.__init__()
407 terminals=self.get_terminals()
408
409 while len(terminals)<ntax:
410 newsplit=random.choice(terminals)
411 new_terminals=self.split(parent_id=newsplit,branchlength=branchlength)
412
413 if branchlength_sd:
414 for nt in new_terminals:
415 bl=random.gauss(branchlength,branchlength_sd)
416 if bl<0:
417 bl=0
418 self.node(nt).data.branchlength=bl
419 terminals.extend(new_terminals)
420 terminals.remove(newsplit)
421
422 random.shuffle(taxon_list)
423 for (node,name) in zip(terminals,taxon_list):
424 self.node(node).data.taxon=name
425
427 """Quick and dirty lists of all nodes."""
428 table=[('#','taxon','prev','succ','brlen','blen (sum)','support')]
429 for i in self.all_ids():
430 n=self.node(i)
431 if not n.data:
432 table.append((str(i),'-',str(n.prev),str(n.succ),'-','-','-'))
433 else:
434 tx=n.data.taxon
435 if not tx:
436 tx='-'
437 blength=n.data.branchlength
438 if blength is None:
439 blength='-'
440 sum_blength='-'
441 else:
442 sum_blength=self.sum_branchlength(node=i)
443 support=n.data.support
444 if support is None:
445 support='-'
446 table.append((str(i),tx,str(n.prev),str(n.succ),blength,sum_blength,support))
447 print '\n'.join(['%3s %32s %15s %15s %8s %10s %8s' % l for l in table])
448 print '\nRoot: ',self.root
449
451 """Return a paup compatible tree line.
452
453 to_string(self,support_as_branchlengths=False,branchlengths_only=False,plain=True)
454 """
455
456 if support_as_branchlengths or branchlengths_only:
457 plain=False
458 self.support_as_branchlengths=support_as_branchlengths
459 self.branchlengths_only=branchlengths_only
460 self.plain=plain
461
462 def make_info_string(data,terminal=False):
463 """Creates nicely formatted support/branchlengths."""
464
465 if self.plain:
466 return ''
467 elif self.support_as_branchlengths:
468 if terminal:
469 return ':%1.2f' % self.max_support
470 else:
471 return ':%1.2f' % (data.support)
472 elif self.branchlengths_only:
473 return ':%1.5f' % (data.branchlength)
474 else:
475 if terminal:
476 return ':%1.5f' % (data.branchlength)
477 else:
478 if data.branchlength is not None and data.support is not None:
479 return '%1.2f:%1.5f' % (data.support,data.branchlength)
480 elif data.branchlength is not None:
481 return '0.00000:%1.5f' % (data.branchlength)
482 elif data.support is not None:
483 return '%1.2f:0.00000' % (data.support)
484 else:
485 return '0.00:0.00000'
486
487 def newickize(node):
488 """Convert a node tree to a newick tree recursively."""
489
490 if not self.node(node).succ:
491 return self.node(node).data.taxon+make_info_string(self.node(node).data,terminal=True)
492 else:
493 return '(%s)%s' % (','.join(map(newickize,self.node(node).succ)),make_info_string(self.node(node).data))
494 return subtree
495
496 treeline=['tree']
497 if self.name:
498 treeline.append(self.name)
499 else:
500 treeline.append('a_tree')
501 treeline.append('=')
502 if self.weight<>1:
503 treeline.append('[&W%s]' % str(round(float(self.weight),3)))
504 if self.rooted:
505 treeline.append('[&R]')
506 treeline.append('(%s)' % ','.join(map(newickize,self.node(self.root).succ)))
507 if plain_newick:
508 return treeline[-1]
509 else:
510 return ' '.join(treeline)+';'
511
513 """Short version of to_string(), gives plain tree"""
514 return self.to_string(plain=True)
515
517 """Defines a unrooted Tree structure, using data of a rooted Tree."""
518
519
520
521 def _get_branches(node):
522 branches=[]
523 for b in self.node(node).succ:
524 branches.append([node,b,self.node(b).data.branchlength,self.node(b).data.support])
525 branches.extend(_get_branches(b))
526 return branches
527
528 self.unrooted=_get_branches(self.root)
529
530 if len(self.node(self.root).succ)==2:
531
532 rootbranches=[b for b in self.unrooted if self.root in b[:2]]
533 b1=self.unrooted.pop(self.unrooted.index(rootbranches[0]))
534 b2=self.unrooted.pop(self.unrooted.index(rootbranches[1]))
535
536
537 newbranch=[b1[1],b2[1],b1[2]+b2[2]]
538 if b1[3] is None:
539 newbranch.append(b2[3])
540 elif b2[3] is None:
541 newbranch.append(b1[3])
542 elif b1[3]==b2[3]:
543 newbranch.append(b1[3])
544 elif b1[3]==0 or b2[3]==0:
545 newbranch.append(b1[3]+b2[3])
546 else:
547 raise TreeError, 'Support mismatch in bifurcating root: %f, %f' % (float(b1[3]),float(b2[3]))
548 self.unrooted.append(newbranch)
549
551
552 def _connect_subtree(parent,child):
553 """Hook subtree starting with node child to parent."""
554 for i,branch in enumerate(self.unrooted):
555 if parent in branch[:2] and child in branch[:2]:
556 branch=self.unrooted.pop(i)
557 break
558 else:
559 raise TreeError, 'Unable to connect nodes for rooting: nodes %d and %d are not connected' % (parent,child)
560 self.link(parent,child)
561 self.node(child).data.branchlength=branch[2]
562 self.node(child).data.support=branch[3]
563
564 child_branches=[b for b in self.unrooted if child in b[:2]]
565 for b in child_branches:
566 if child==b[0]:
567 succ=b[1]
568 else:
569 succ=b[0]
570 _connect_subtree(child,succ)
571
572
573 if outgroup is None:
574 return self.root
575 outgroup_node=self.is_monophyletic(outgroup)
576 if outgroup_node==-1:
577 return -1
578
579
580 if (len(self.node(self.root).succ)==2 and outgroup_node in self.node(self.root).succ) or outgroup_node==self.root:
581 return self.root
582
583 self.unroot()
584
585
586 for i,b in enumerate(self.unrooted):
587 if outgroup_node in b[:2] and self.node(outgroup_node).prev in b[:2]:
588 root_branch=self.unrooted.pop(i)
589 break
590 else:
591 raise TreeError, 'Unrooted and rooted Tree do not match'
592 if outgroup_node==root_branch[1]:
593 ingroup_node=root_branch[0]
594 else:
595 ingroup_node=root_branch[1]
596
597 for n in self.all_ids():
598 self.node(n).prev=None
599 self.node(n).succ=[]
600
601 root=Nodes.Node(data=NodeData())
602 self.add(root)
603 self.root=root.id
604 self.unrooted.append([root.id,ingroup_node,root_branch[2],root_branch[3]])
605 self.unrooted.append([root.id,outgroup_node,0.0,0.0])
606 _connect_subtree(root.id,ingroup_node)
607 _connect_subtree(root.id,outgroup_node)
608
609 oldroot=[i for i in self.all_ids() if self.node(i).prev is None and i!=self.root]
610 if len(oldroot)>1:
611 raise TreeError, 'Isolated nodes in tree description: %s' % ','.join(oldroot)
612 elif len(oldroot)==1:
613 self.kill(oldroot[0])
614 return self.root
615
616
617 -def consensus(trees, threshold=0.5,outgroup=None):
692