1
2
3
4
5
6
7 """Definitions for interacting with Blast related applications.
8 """
9 from Bio.Application import _Option, AbstractCommandline
10
12 """Create a commandline for the fasta program from NCBI.
13
14 """
15 - def __init__(self, cmd="fastacmd", **kwargs):
16 self.parameters = \
17 [
18 _Option(["-d", "database"], ["input"], None, 1,
19 "The database to retrieve from."),
20 _Option(["-s", "search_string"], ["input"], None, 1,
21 "The id to search for.")
22 ]
23 AbstractCommandline.__init__(self, cmd, **kwargs)
24
25
27 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
28
29 This is provided for subclassing, it deals with shared options
30 common to all the BLAST tools (blastall, rpsblast, pgpblast).
31 """
33 assert cmd is not None
34 extra_parameters = [\
35 _Option(["-d", "database"], ["input"], None, 1,
36 "The database to BLAST against.", False),
37 _Option(["-i", "infile"], ["input", "file"], None, 1,
38 "The sequence to search with.", False),
39 _Option(["-e", "expectation"], ["input"], None, 0,
40 "Expectation value cutoff.", False),
41 _Option(["-m", "align_view"], ["input"], None, 0,
42 "Alignment view. Integer 0-11. Use 7 for XML output.",
43 False),
44 _Option(["-o", "align_outfile", "outfile"], ["output", "file"], None, 0,
45 "Output file for alignment.", False),
46 _Option(["-y", "xdrop_extension"], ["input"], None, 0,
47 "Dropoff for blast extensions.", False),
48 _Option(["-F", "filter"], ["input"], None, 0,
49 "Filter query sequence with SEG? T/F", False),
50 _Option(["-X", "xdrop"], ["input"], None, 0,
51 "Dropoff value (bits) for gapped alignments."),
52 _Option(["-I", "show_gi"], ["input"], None, 0,
53 "Show GI's in deflines? T/F", False),
54 _Option(["-J", "believe_query"], ["input"], None, 0,
55 "Believe the query defline? T/F", False),
56 _Option(["-Z", "xdrop_final"], ["input"], None, 0,
57 "X dropoff for final gapped alignment.", False),
58 _Option(["-z", "db_length"], ["input"], None, 0,
59 "Effective database length.", False),
60 _Option(["-O", "seqalign_file"], ["output", "file"], None, 0,
61 "seqalign file to output.", False),
62 _Option(["-v", "descriptions"], ["input"], None, 0,
63 "Number of one-line descriptions.", False),
64 _Option(["-b", "alignments"], ["input"], None, 0,
65 "Number of alignments.", False),
66 _Option(["-Y", "search_length"], ["input"], None, 0,
67 "Effective length of search space (use zero for the " + \
68 "real size).", False),
69 _Option(["-T", "html"], ["input"], None, 0,
70 "Produce HTML output? T/F", False),
71 _Option(["-U", "case_filter"], ["input"], None, 0,
72 "Use lower case filtering of FASTA sequence? T/F", False),
73
74 _Option(["-a", "nprocessors"], ["input"], None, 0,
75 "Number of processors to use.", False),
76 _Option(["-g", "gapped"], ["input"], None, 0,
77 "Whether to do a gapped alignment. T/F", False),
78 ]
79 try :
80
81
82 self.parameters = extra_parameters + self.parameters
83 except AttributeError:
84
85 self.parameters = extra_parameters
86 AbstractCommandline.__init__(self, cmd, **kwargs)
87
88
90 """Base Commandline object for NCBI BLAST wrappers (PRIVATE).
91
92 This is provided for subclassing, it deals with shared options
93 common to all the blastall and pgpblast tools (but not rpsblast).
94 """
96 assert cmd is not None
97 extra_parameters = [\
98 _Option(["-G", "gap_open"], ["input"], None, 0,
99 "Gap open penalty", False),
100 _Option(["-E", "gap_extend"], ["input"], None, 0,
101 "Gap extension penalty", False),
102 _Option(["-A", "window_size"], ["input"], None, 0,
103 "Multiple hits window size", False),
104 _Option(["-f", "hit_extend"], ["input"], None, 0,
105 "Threshold for extending hits.", False),
106 _Option(["-K", "keep_hits"], ["input"], None, 0,
107 " Number of best hits from a region to keep.", False),
108 _Option(["-W", "wordsize"], ["input"], None, 0,
109 "Word size", False),
110 _Option(["-P", "passes"], ["input"], None, 0,
111 "Hits/passes. Integer 0-2. 0 for multiple hit, "
112 "1 for single hit (does not apply to blastn)", False),
113 ]
114 try :
115
116
117 self.parameters = extra_parameters + self.parameters
118 except AttributeError:
119
120 self.parameters = extra_parameters
121 _BlastCommandLine.__init__(self, cmd, **kwargs)
122
123
125 """Create a commandline for the blastall program from NCBI."""
126
127 - def __init__(self, cmd="blastall",**kwargs):
128 self.parameters = [ \
129
130
131
132 _Option(["-p", "program"], ["input"], None, 1,
133 "The blast program to use (e.g. blastp, blastn).", False),
134 _Option(["-q", "nuc_mismatch"], ["input"], None, 0,
135 "Penalty for a nucleotide mismatch (blastn only).", False),
136 _Option(["-r", "nuc_match"], ["input"], None, 0,
137 "Reward for a nucleotide match (blastn only).", False),
138 _Option(["-Q", "query_genetic_code"], ["input"], None, 0,
139 "Query Genetic code to use.", False),
140 _Option(["-D", "db_genetic_code"], ["input"], None, 0,
141 "DB Genetic code (for tblast[nx] only).", False),
142 _Option(["-M", "matrix"], ["input"], None, 0,
143 "Matrix to use", False),
144 _Option(["-S", "strands"], ["input"], None, 0,
145 "Query strands to search against database (for blast[nx], " + \
146 "and tblastx). 3 is both, 1 is top, 2 is bottom.", False),
147 _Option(["-l", "restrict_gi"], ["input"], None, 0,
148 "Restrict search of database to list of GI's.", False),
149 _Option(["-R"], ["input", "file"], None, 0,
150 "PSI-TBLASTN checkpoint input file.", False),
151 _Option(["-n", "megablast"], ["input"], None, 0,
152 "MegaBlast search T/F.", False),
153
154
155 _Option(["-L", "region_length", "range_restriction"], ["input"],
156 None, 0,
157 """Location on query sequence (string format start,end).
158
159 In older versions of BLAST, -L set the length of region
160 used to judge hits (see -K parameter).""", False),
161 _Option(["-w"], ["input"], None, 0,
162 "Frame shift penalty (OOF algorithm for blastx).", False),
163 _Option(["-t"], ["input"], None, 0,
164 "Length of the largest intron allowed in a translated " + \
165 "nucleotide sequence when linking multiple distinct " + \
166 "alignments. (0 invokes default behavior; a negative value " + \
167 "disables linking.)", False),
168 _Option(["-B"], ["input"], None, 0,
169 "Number of concatenated queries, for blastn and tblastn.",
170 False),
171 _Option(["-V", "oldengine"], ["input"], None, 0,
172 "Force use of the legacy BLAST engine.", False),
173 _Option(["-C"], ["input"], None, 0,
174 """Use composition-based statistics for tblastn:
175 D or d: default (equivalent to F)
176 0 or F or f: no composition-based statistics
177 1 or T or t: Composition-based statistics as in NAR 29:2994-3005, 2001
178 2: Composition-based score adjustment as in Bioinformatics
179 21:902-911, 2005, conditioned on sequence properties
180 3: Composition-based score adjustment as in Bioinformatics
181 21:902-911, 2005, unconditionally
182 For programs other than tblastn, must either be absent or be
183 D, F or 0.""", False),
184 _Option(["-s"], ["input"], None, 0,
185 "Compute locally optimal Smith-Waterman alignments (This " + \
186 "option is only available for gapped tblastn.) T/F", False),
187 ]
188 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
189
190
192 """Create a commandline for the blastpgp program from NCBI."""
193 - def __init__(self, cmd="blastpgp",**kwargs):
194 self.parameters = [ \
195 _Option(["-C", "checkpoint_outfile"], ["output", "file"], None, 0,
196 "Output file for PSI-BLAST checkpointing.", False),
197 _Option(["-R", "restart_infile"], ["input", "file"], None, 0,
198 "Input file for PSI-BLAST restart.", False),
199 _Option(["-k", "hit_infile"], ["input", "file"], None, 0,
200 "Hit file for PHI-BLAST.", False),
201 _Option(["-Q", "matrix_outfile"], ["output", "file"], None, 0,
202 "Output file for PSI-BLAST matrix in ASCII.", False),
203 _Option(["-B", "align_infile"], ["input", "file"], None, 0,
204 "Input alignment file for PSI-BLAST restart.", False),
205 _Option(["-S", "required_start"], ["input"], None, 0,
206 "Start of required region in query.", False),
207 _Option(["-H", "required_end"], ["input"], None, 0,
208 "End of required region in query.", False),
209 _Option(["-j", "npasses"], ["input"], None, 0,
210 "Number of passes", False),
211 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
212 "Number of bits to trigger gapping.", False),
213 _Option(["-c", "pseudocounts"], ["input"], None, 0,
214 "Pseudocounts constants for multiple passes.", False),
215 _Option(["-h", "model_threshold"], ["input"], None, 0,
216 "E-value threshold to include in multipass model.", False),
217
218 _Option(["-L", "region_length"], ["input"], None, 0,
219 "Cost to decline alignment (disabled when zero).", False),
220 _Option(["-M", "matrix"], ["input"], None, 0,
221 "Matrix (string, default BLOSUM62).", False),
222 _Option(["-p", "program"], ["input"], None, 1,
223 "The blast program to use (e.g blastpgp, patseedp or seedp).", False),
224 ]
225 _BlastAllOrPgpCommandLine.__init__(self, cmd, **kwargs)
226
227
229 """Create a commandline for the rpsblast program from NCBI."""
230 - def __init__(self, cmd="rpsblast",**kwargs):
231 self.parameters = [ \
232
233 _Option(["-N", "nbits_gapping"], ["input"], None, 0,
234 "Number of bits to trigger gapping.", False),
235
236
237 _Option(["-P", "multihit"], ["input"], None, 0,
238 "0 for multiple hit, 1 for single hit", False),
239 _Option(["-l", "logfile"], ["output", "file"], None, 0,
240 "Logfile name.", False),
241 _Option(["-p", "protein"], ["input"], None, 0,
242 "Query sequence is protein. T/F", False),
243 _Option(["-L", "range_restriction"], ["input"], None, 0,
244 "Location on query sequence (string format start,end).",
245 False),
246 ]
247 _BlastCommandLine.__init__(self, cmd, **kwargs)
248