1
2
3
4
5 """Command line wrapper for the multiple alignment program Clustal W.
6
7 http://www.clustal.org/
8
9 Citation:
10
11 Larkin MA, Blackshields G, Brown NP, Chenna R, McGettigan PA, McWilliam H,
12 Valentin F, Wallace IM, Wilm A, Lopez R, Thompson JD, Gibson TJ, Higgins DG.
13 (2007). Clustal W and Clustal X version 2.0. Bioinformatics, 23, 2947-2948.
14
15 Last checked against versions: 1.83 and 2.0.10
16 """
17 import os
18 import types
19 from Bio.Application import _Option, _Switch, AbstractCommandline
20
22 """Command line wrapper for clustalw (version one or two)."""
23
24 - def __init__(self, cmd="clustalw", **kwargs):
25 self.parameters = \
26 [
27 _Option(["-infile", "-INFILE", "INFILE", "infile"],
28 ["input", "file"],
29 None,
30 False,
31 "Input sequences.",
32 True),
33 _Option(["-profile1", "-PROFILE1", "PROFILE1", "profile1"],
34 ["input", "file"],
35 None,
36 False,
37 "Profiles (old alignment).",
38 True),
39 _Option(["-profile2", "-PROFILE2", "PROFILE2", "profile2"],
40 ["input", "file"],
41 None,
42 False,
43 "Profiles (old alignment).",
44 True),
45
46 _Switch(["-options", "-OPTIONS", "OPTIONS", "options"],
47 ["input"],
48 "List the command line parameters"),
49 _Switch(["-help", "-HELP", "HELP", "help"],
50 ["input"],
51 "Outline the command line params."),
52 _Switch(["-check", "-CHECK", "CHECK", "check"],
53 ["input"],
54 "Outline the command line params."),
55 _Switch(["-fullhelp", "-FULLHELP", "FULLHELP", "fullhelp"],
56 ["input"],
57 "Output full help content."),
58 _Switch(["-align", "-ALIGN", "ALIGN", "align"],
59 ["input"],
60 "Do full multiple alignment."),
61 _Switch(["-tree", "-TREE", "TREE", "tree"],
62 ["input"],
63 "Calculate NJ tree."),
64 _Option(["-bootstrap", "-BOOTSTRAP", "BOOTSTRAP", "bootstrap"],
65 ["input"],
66 lambda x: isinstance(x, types.IntType),
67 False,
68 "Bootstrap a NJ tree (n= number of bootstraps; def. = 1000).",
69 True),
70 _Switch(["-convert", "-CONVERT", "CONVERT", "convert"],
71 ["input"],
72 "Output the input sequences in a different file format."),
73
74
75
76
77
78
79
80
81
82 _Switch(["-quicktree", "-QUICKTREE", "QUICKTREE", "quicktree"],
83 ["input"],
84 "Use FAST algorithm for the alignment guide tree"),
85 _Option(["-type", "-TYPE", "TYPE", "type"],
86 ["input"],
87 lambda x: x in ["PROTEIN", "DNA", "protein", "dna"],
88 False,
89 "PROTEIN or DNA sequences",
90 True),
91 _Switch(["-negative", "-NEGATIVE", "NEGATIVE", "negative"],
92 ["input"],
93 "Protein alignment with negative values in matrix"),
94 _Option(["-outfile", "-OUTFILE", "OUTFILE", "outfile"],
95 ["input", "file"],
96 None,
97 False,
98 "Output sequence alignment file name",
99 True),
100 _Option(["-output", "-OUTPUT", "OUTPUT", "output"],
101 ["input"],
102 lambda x: x in ["GCG", "GDE", "PHYLIP", "PIR", "NEXUS",
103 "gcg", "gde", "phylip", "pir", "nexus"],
104 False,
105 "Output format: GCG, GDE, PHYLIP, PIR or NEXUS",
106 True),
107 _Option(["-outorder", "-OUTORDER", "OUTORDER", "outorder"],
108 ["input"],
109 lambda x: x in ["INPUT", "input", "ALIGNED", "aligned"],
110 False,
111 "Output taxon order: INPUT or ALIGNED",
112 True),
113 _Option(["-case", "-CASE", "CASE", "case"],
114 ["input"],
115 lambda x: x in ["UPPER", "upper", "LOWER", "lower"],
116 False,
117 "LOWER or UPPER (for GDE output only)",
118 True),
119 _Option(["-seqnos", "-SEQNOS", "SEQNOS", "seqnos"],
120 ["input"],
121 lambda x: x in ["ON", "on", "OFF", "off"],
122 False,
123 "OFF or ON (for Clustal output only)",
124 True),
125 _Option(["-seqno_range", "-SEQNO_RANGE", "SEQNO_RANGE", "seqno_range"],
126 ["input"],
127 lambda x: x in ["ON", "on", "OFF", "off"],
128 False,
129 "OFF or ON (NEW- for all output formats)",
130 True),
131 _Option(["-range", "-RANGE", "RANGE", "range"],
132 ["input"],
133 None,
134 False,
135 "Sequence range to write starting m to m+n. " + \
136 "Input as string eg. '24,200'",
137 True),
138 _Option(["-maxseqlen", "-MAXSEQLEN", "MAXSEQLEN", "maxseqlen"],
139 ["input"],
140 lambda x: ininstance(x, types.IntType),
141 False,
142 "Maximum allowed input sequence length",
143 True),
144 _Switch(["-quiet", "-QUIET", "QUIET", "quiet"],
145 ["input"],
146 "Reduce console output to minimum"),
147 _Switch(["-stats", "-STATS", "STATS", "stats"],
148 ["input"],
149 "Log some alignents statistics to file"),
150
151 _Option(["-ktuple", "-KTUPLE", "KTUPLE", "ktuple"],
152 ["input"],
153 lambda x: isinstance(x, types.IntType) or \
154 isinstance(x, types.FloatType),
155 False,
156 "Word size",
157 True),
158 _Option(["-topdiags", "-TOPDIAGS", "TOPDIAGS", "topdiags"],
159 ["input"],
160 lambda x: isinstance(x, types.IntType) or \
161 isinstance(x, types.FloatType),
162 False,
163 "Number of best diags.",
164 True),
165 _Option(["-window", "-WINDOW", "WINDOW", "window"],
166 ["input"],
167 lambda x: isinstance(x, types.IntType) or \
168 isinstance(x, types.FloatType),
169 False,
170 "Window around best diags.",
171 True),
172 _Option(["-pairgap", "-PAIRGAP", "PAIRGAP", "pairgap"],
173 ["input"],
174 lambda x: isinstance(x, types.IntType) or \
175 isinstance(x, types.FloatType),
176 False,
177 "Gap penalty",
178 True),
179 _Option(["-score", "-SCORE", "SCORE", "score"],
180 ["input"],
181 lambda x: x in ["percent", "PERCENT", "absolute",
182 "ABSOLUTE"],
183 False,
184 "Either: PERCENT or ABSOLUTE",
185 True),
186
187 _Option(["-pwmatrix", "-PWMATRIX", "PWMATRIX", "pwmatrix"],
188 ["input"],
189 lambda x: x in ["BLOSUM", "PAM", "GONNET", "ID", \
190 "blosum", "pam", "gonnet", "id"] or \
191 os.path.exists(x),
192 False,
193 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
194 True),
195 _Option(["-pwdnamatrix", "-PWDNAMATRIX", "PWDNAMATRIX", "pwdnamatrix"],
196 ["input"],
197 lambda x: x in ["IUB", "CLUSTALW", "iub", "clustalw"] or \
198 os.path.exists(x),
199 False,
200 "DNA weight matrix=IUB, CLUSTALW or filename",
201 True),
202 _Option(["-pwgapopen", "-PWGAPOPEN", "PWGAPOPEN", "pwgapopen"],
203 ["input"],
204 lambda x: isinstance(x, types.IntType) or \
205 isinstance(x, types.FloatType),
206 False,
207 "Gap opening penalty",
208 True),
209 _Option(["-pwgapext", "-PWGAPEXT", "PWGAPEXT", "pwgapext"],
210 ["input"],
211 lambda x: isinstance(x, types.IntType) or \
212 isinstance(x, types.FloatType),
213 False,
214 "Gap opening penalty",
215 True),
216
217 _Option(["-newtree", "-NEWTREE", "NEWTREE", "newtree"],
218 ["output", "file"],
219 None,
220 False,
221 "Output file name for newly created guide tree",
222 True),
223 _Option(["-usetree", "-USETREE", "USETREE", "usetree"],
224 ["input", "file"],
225 lambda x: os.path.exists,
226 False,
227 "File name of guide tree",
228 True),
229 _Option(["-matrix", "-MATRIX", "MATRIX", "matrix"],
230 ["input"],
231 lambda x: x in ["BLOSUM", "PAM", "GONNET", "ID", \
232 "blosum", "pam", "gonnet", "id"] or \
233 os.path.exists(x),
234 False,
235 "Protein weight matrix=BLOSUM, PAM, GONNET, ID or filename",
236 True),
237 _Option(["-dnamatrix", "-DNAMATRIX", "DNAMATRIX", "dnamatrix"],
238 ["input"],
239 lambda x: x in ["IUB", "CLUSTALW", "iub", "clustalw"] or \
240 os.path.exists(x),
241 False,
242 "DNA weight matrix=IUB, CLUSTALW or filename",
243 True),
244 _Option(["-gapopen", "-GAPOPEN", "GAPOPEN", "gapopen"],
245 ["input"],
246 lambda x: isinstance(x, types.IntType) or \
247 isinstance(x, types.FloatType),
248 False,
249 "Gap opening penalty",
250 True),
251 _Option(["-gapext", "-GAPEXT", "GAPEXT", "gapext"],
252 ["input"],
253 lambda x: isinstance(x, types.IntType) or \
254 isinstance(x, types.FloatType),
255 False,
256 "Gap extension penalty",
257 True),
258 _Switch(["-endgaps", "-ENDGAPS", "ENDGAPS", "endgaps"],
259 ["input"],
260 "No end gap separation pen."),
261 _Option(["-gapdist", "-GAPDIST", "GAPDIST", "gapdist"],
262 ["input"],
263 lambda x: isinstance(x, types.IntType) or \
264 isinstance(x, types.FloatType),
265 False,
266 "Gap separation pen. range",
267 False),
268 _Switch(["-nopgap", "-NOPGAP", "NOPGAP", "nopgap"],
269 ["input"],
270 "Residue-specific gaps off"),
271 _Switch(["-nohgap", "-NOHGAP", "NOHGAP", "nohgap"],
272 ["input"],
273 "Hydrophilic gaps off"),
274 _Switch(["-hgapresidues", "-HGAPRESIDUES", "HGAPRESIDUES", "hgapresidues"],
275 ["input"],
276 "List hydrophilic res."),
277 _Option(["-maxdiv", "-MAXDIV", "MAXDIV", "maxdiv"],
278 ["input"],
279 lambda x: isinstance(x, types.IntType) or \
280 isinstance(x, types.FloatType),
281 False,
282 "% ident. for delay",
283 True),
284 _Option(["-transweight", "-TRANSWEIGHT", "TRANSWEIGHT", "transweight"],
285 ["input"],
286 lambda x: isinstance(x, types.IntType) or \
287 isinstance(x, types.FloatType),
288 False,
289 "Transitions weighting",
290 True),
291 _Option(["-iteration", "-ITERATION", "ITERATION", "iteration"],
292 ["input"],
293 lambda x: x in ["NONE", "TREE", "ALIGNMENT",
294 "none", "tree", "alignment"],
295 False,
296 "NONE or TREE or ALIGNMENT",
297 True),
298 _Option(["-numiter", "-NUMITER", "NUMITER", "numiter"],
299 ["input"],
300 lambda x: isinstance(x, types.IntType),
301 False,
302 "maximum number of iterations to perform",
303 False),
304 _Switch(["-noweights", "-NOWEIGHTS", "NOWEIGHTS", "noweights"],
305 ["input"],
306 "Disable sequence weighting"),
307
308 _Switch(["-profile", "-PROFILE", "PROFILE", "profile"],
309 ["input"],
310 "Merge two alignments by profile alignment"),
311 _Option(["-newtree1", "-NEWTREE1", "NEWTREE1", "newtree1"],
312 ["output", "file"],
313 None,
314 False,
315 "Output file name for new guide tree of profile1",
316 True),
317 _Option(["-newtree2", "-NEWTREE2", "NEWTREE2", "newtree2"],
318 ["output", "file"],
319 None,
320 False,
321 "Output file for new guide tree of profile2",
322 True),
323 _Option(["-usetree1", "-USETREE1", "USETREE1", "usetree1"],
324 ["input", "file"],
325 lambda x: os.path.exists,
326 False,
327 "File name of guide tree for profile1",
328 True),
329 _Option(["-usetree2", "-USETREE2", "USETREE2", "usetree2"],
330 ["input", "file"],
331 lambda x: os.path.exists,
332 False,
333 "File name of guide tree for profile2",
334 True),
335
336 _Switch(["-sequences", "-SEQUENCES", "SEQUENCES", "sequences"],
337 ["input"],
338 "Sequentially add profile2 sequences to profile1 alignment"),
339 _Switch(["-nosecstr1", "-NOSECSTR1", "NOSECSTR1", "nosecstr1"],
340 ["input"],
341 "Do not use secondary structure-gap penalty mask for profile 1"),
342 _Switch(["-nosecstr2", "-NOSECSTR2", "NOSECSTR2", "nosecstr2"],
343 ["input"],
344 "Do not use secondary structure-gap penalty mask for profile 2"),
345
346 _Option(["-secstrout", "-SECSTROUT", "SECSTROUT", "secstrout"],
347 ["input"],
348 lambda x: x in ["STRUCTURE", "MASK", "BOTH", "NONE",
349 "structure", "mask", "both", "none"],
350 False,
351 "STRUCTURE or MASK or BOTH or NONE output in alignment file",
352 True),
353 _Option(["-helixgap", "-HELIXGAP", "HELIXGAP", "helixgap"],
354 ["input"],
355 lambda x: isinstance(x, types.IntType) or \
356 isinstance(x, types.FloatType),
357 False,
358 "Gap penalty for helix core residues",
359 True),
360 _Option(["-strandgap", "-STRANDGAP", "STRANDGAP", "strandgap"],
361 ["input"],
362 lambda x: isinstance(x, types.IntType) or \
363 isinstance(x, types.FloatType),
364 False,
365 "gap penalty for strand core residues",
366 True),
367 _Option(["-loopgap", "-LOOPGAP", "LOOPGAP", "loopgap"],
368 ["input"],
369 lambda x: isinstance(x, types.IntType) or \
370 isinstance(x, types.FloatType),
371 False,
372 "Gap penalty for loop regions",
373 True),
374 _Option(["-terminalgap", "-TERMINALGAP", "TERMINALGAP", "terminalgap"],
375 ["input"],
376 lambda x: isinstance(x, types.IntType) or \
377 isinstance(x, types.FloatType),
378 False,
379 "Gap penalty for structure termini",
380 True),
381 _Option(["-helixendin", "-HELIXENDIN", "HELIXENDIN", "helixendin"],
382 ["input"],
383 lambda x: isinstance(x, types.IntType),
384 False,
385 "Number of residues inside helix to be treated as terminal",
386 True),
387 _Option(["-helixendout", "-HELIXENDOUT", "HELIXENDOUT", "helixendout"],
388 ["input"],
389 lambda x: isinstance(x, types.IntType),
390 False,
391 "Number of residues outside helix to be treated as terminal",
392 True),
393 _Option(["-strandendin", "-STRANDENDIN", "STRANDENDIN", "strandendin"],
394 ["input"],
395 lambda x: isinstance(x, types.IntType),
396 False,
397 "Number of residues inside strand to be treated as terminal",
398 True),
399 _Option(["-strandendout", "-STRANDENDOUT", "STRANDENDOUT", "strandendout"],
400 ["input"],
401 lambda x: isinstance(x, types.IntType),
402 False,
403 "number of residues outside strand to be treated as terminal",
404 True),
405
406 _Option(["-outputtree", "-OUTPUTTREE", "OUTPUTTREE", "outputtree"],
407 ["input"],
408 lambda x: x in ["NJ", "PHYLIP", "DIST", "NEXUS",
409 "nj", "phylip", "dist", "nexus"],
410 False,
411 "nj OR phylip OR dist OR nexus",
412 True),
413 _Option(["-seed", "-SEED", "SEED", "seed"],
414 ["input"],
415 lambda x: isinstance(x, types.IntType),
416 False,
417 "Seed number for bootstraps.",
418 True),
419 _Switch(["-kimura", "-KIMURA", "KIMURA", "kimura"],
420 ["input"],
421 "Use Kimura's correction."),
422 _Switch(["-tossgaps", "-TOSSGAPS", "TOSSGAPS", "tossgaps"],
423 ["input"],
424 "Ignore positions with gaps."),
425 _Option(["-bootlabels", "-BOOTLABELS", "BOOTLABELS", "bootlabels"],
426 ["input"],
427 lambda x: x in ["NODE", "BRANCH", "node", "branch"],
428 False,
429 "Node OR branch position of bootstrap values in tree display",
430 True),
431 _Option(["-clustering", "-CLUSTERING", "CLUSTERING", "clustering"],
432 ["input"],
433 lambda x: x in ["NJ", "UPGMA", "nj", "upgma"],
434 False,
435 "NJ or UPGMA",
436 True)
437 ]
438 AbstractCommandline.__init__(self, cmd, **kwargs)
439