1
2
3
4
5
6 """
7 Bio.Application command line for the multiple alignment program DIALIGN2-2.
8
9 http://bibiserv.techfak.uni-bielefeld.de/dialign/welcome.html
10
11 Citations:
12
13 B. Morgenstern (2004). DIALIGN: Multiple DNA and Protein Sequence Alignment
14 at BiBiServ. Nucleic Acids Research 32, W33-W36.
15
16 Last checked against version: 2.2
17 """
18 import os
19 import types
20 from Bio import Application
21 from Bio.Application import _Option, _Argument, _Switch, AbstractCommandline
22
24 """Command line wrapper for the multiple alignment program DIALIGN2-2."""
25 - def __init__(self, cmd="dialign2-2", **kwargs):
26 self.program_name = cmd
27 self.parameters = \
28 [
29 _Switch(["-afc", "afc"], ["input"],
30 "Creates additional output file '*.afc' " + \
31 "containing data of all fragments considered " + \
32 "for alignment WARNING: this file can be HUGE !"),
33 _Switch(["-afc_v", "afc_v"], ["input"],
34 "Like '-afc' but verbose: fragments are explicitly " + \
35 "printed. WARNING: this file can be EVEN BIGGER !"),
36 _Switch(["-anc", "anc"], ["input"],
37 "Anchored alignment. Requires a file <seq_file>.anc " + \
38 "containing anchor points."),
39 _Switch(["-cs", "cs"], ["input"],
40 "If segments are translated, not only the `Watson " + \
41 "strand' but also the `Crick strand' is looked at."),
42 _Switch(["-cw", "cw"], ["input"],
43 "Additional output file in CLUSTAL W format."),
44 _Switch(["-ds", "ds"], ["input"],
45 "`dna alignment speed up' - non-translated nucleic acid " + \
46 "fragments are taken into account only if they start " + \
47 "with at least two matches. Speeds up DNA alignment at " + \
48 "the expense of sensitivity."),
49 _Switch(["-fa", "fa"], ["input"],
50 "Additional output file in FASTA format."),
51 _Switch(["-ff", "ff"], ["input"],
52 "Creates file *.frg containing information about all " + \
53 "fragments that are part of the respective optimal " + \
54 "pairwise alignmnets plus information about " + \
55 "consistency in the multiple alignment"),
56 _Option(["-fn", "fn"], ["input"],
57 None,
58 0,
59 "Output files are named <out_file>.<extension>.",
60 0),
61 _Switch(["-fop", "fop"], ["input"],
62 "Creates file *.fop containing coordinates of all " + \
63 "fragments that are part of the respective pairwise alignments."),
64 _Switch(["-fsm", "fsm"], ["input"],
65 "Creates file *.fsm containing coordinates of all " + \
66 "fragments that are part of the final alignment"),
67 _Switch(["-iw", "iw"], ["input"],
68 "Overlap weights switched off (by default, overlap " + \
69 "weights are used if up to 35 sequences are aligned). " + \
70 "This option speeds up the alignment but may lead " + \
71 "to reduced alignment quality."),
72 _Switch(["-lgs", "lgs"], ["input"],
73 "`long genomic sequences' - combines the following " + \
74 "options: -ma, -thr 2, -lmax 30, -smin 8, -nta, -ff, " + \
75 "-fop, -ff, -cs, -ds, -pst "),
76 _Switch(["-lgs_t", "lgs_t"], ["input"],
77 "Like '-lgs' but with all segment pairs assessed " + \
78 "at the peptide level (rather than 'mixed alignments' " + \
79 "as with the '-lgs' option). Therefore faster than " + \
80 "-lgs but not very sensitive for non-coding regions."),
81 _Option(["-lmax", "lmax"], ["input"],
82 lambda x: isinstance(x, types.IntType),
83 0,
84 "Maximum fragment length = x (default: x = 40 or " + \
85 "x = 120 for `translated' fragments). Shorter x " + \
86 "speeds up the program but may affect alignment quality.",
87 0),
88 _Switch(["-lo", "lo"], ["input"],
89 "(Long Output) Additional file *.log with information " + \
90 "about fragments selected for pairwise alignment and " + \
91 "about consistency in multi-alignment proceedure."),
92 _Switch(["-ma", "ma"], ["input"],
93 "`mixed alignments' consisting of P-fragments and " + \
94 "N-fragments if nucleic acid sequences are aligned."),
95 _Switch(["-mask", "mask"], ["input"],
96 "Residues not belonging to selected fragments are " + \
97 "replaced by `*' characters in output alignment " + \
98 "(rather than being printed in lower-case characters)"),
99 _Switch(["-mat", "mat"], ["input"],
100 "Creates file *mat with substitution counts derived " + \
101 "from the fragments that have been selected for alignment."),
102 _Switch(["-mat_thr", "mat_thr"], ["input"],
103 "Like '-mat' but only fragments with weight score " + \
104 "> t are considered"),
105 _Switch(["-max_link", "max_link"], ["input"],
106 "'maximum linkage' clustering used to construct " + \
107 "sequence tree (instead of UPGMA)."),
108 _Switch(["-min_link", "min_link"], ["input"],
109 "'minimum linkage' clustering used."),
110 _Option(["-mot", "mot"], ["input"],
111 None,
112 0,
113 "'motif' option.",
114 0),
115 _Switch(["-msf", "msf"], ["input"],
116 "Separate output file in MSF format."),
117 _Switch(["-n", "n"], ["input"],
118 "Input sequences are nucleic acid sequences. " + \
119 "No translation of fragments."),
120 _Switch(["-nt", "nt"], ["input"],
121 "Input sequences are nucleic acid sequences and " + \
122 "`nucleic acid segments' are translated to `peptide " + \
123 "segments'."),
124 _Switch(["-nta", "nta"], ["input"],
125 "`no textual alignment' - textual alignment suppressed. " + \
126 "This option makes sense if other output files are of " + \
127 "intrest -- e.g. the fragment files created with -ff, " + \
128 "-fop, -fsm or -lo."),
129 _Switch(["-o", "o"], ["input"],
130 "Fast version, resulting alignments may be slightly " + \
131 "different."),
132 _Switch(["-ow", "ow"], ["input"],
133 "Overlap weights enforced (By default, overlap weights " + \
134 "are used only if up to 35 sequences are aligned since " + \
135 "calculating overlap weights is time consuming)."),
136 _Switch(["-pst", "pst"], ["input"],
137 "'print status'. Creates and updates a file *.sta with " + \
138 "information about the current status of the program " + \
139 "run. This option is recommended if large data sets " + \
140 "are aligned since it allows the user to estimate the " + \
141 "remaining running time."),
142 _Switch(["-smin", "smin"], ["input"],
143 "Minimum similarity value for first residue pair " + \
144 "(or codon pair) in fragments. Speeds up protein " + \
145 "alignment or alignment of translated DNA fragments " + \
146 "at the expense of sensitivity."),
147 _Option(["-stars", "stars"], ["input"],
148 lambda x: x in range(0,10),
149 0,
150 "Maximum number of `*' characters indicating degree " + \
151 "of local similarity among sequences. By default, no " + \
152 "stars are used but numbers between 0 and 9, instead.",
153 0),
154 _Switch(["-stdo", "stdo"], ["input"],
155 "Results written to standard output."),
156 _Switch(["-ta", "ta"], ["input"],
157 "Standard textual alignment printed (overrides " + \
158 "suppression of textual alignments in special " + \
159 "options, e.g. -lgs)"),
160 _Option(["-thr", "thr"], ["input"],
161 lambda x: isinstance(x, types.IntType),
162 0,
163 "Threshold T = x.",
164 0),
165 _Switch(["-xfr", "xfr"], ["input"],
166 "'exclude fragments' - list of fragments can be " + \
167 "specified that are NOT considered for pairwise alignment"),
168 _Argument(["input"], ["input", "file"], None, 1,
169 "Input file name. Must be FASTA format")
170 ]
171 AbstractCommandline.__init__(self, cmd, **kwargs)
172