Package Bio :: Package Application
[hide private]
[frames] | no frames]

Source Code for Package Bio.Application

  1  # Copyright 2001-2004 Brad Chapman. 
  2  # Revisions copyright 2009 by Peter Cock. 
  3  # All rights reserved. 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7  """General mechanisms to access applications in Biopython. 
  8  """ 
  9  import os, sys 
 10  import StringIO 
 11  import subprocess 
 12   
 13  from Bio import File 
 14   
15 -def generic_run(commandline):
16 """Run an application with the given commandline (OBSOLETE). 17 18 This expects a pre-built commandline that derives from 19 AbstractCommandline, and returns a ApplicationResult object 20 to get results from a program, along with handles of the 21 standard output and standard error. 22 23 WARNING - This will read in the full program output into memory! 24 This may be in issue when the program writes a large amount of 25 data to standard output. 26 27 NOTE - This function is considered to be obsolete, and we intend to 28 deprecate it and then remove it in future releases of Biopython. 29 We now recommend you invoke subprocess directly, using str(commandline) 30 to turn an AbstractCommandline wrapper into a command line string. This 31 will give you full control of the tool's input and output as well. 32 """ 33 #We don't need to supply any piped input, but we setup the 34 #standard input pipe anyway as a work around for a python 35 #bug if this is called from a Windows GUI program. For 36 #details, see http://bugs.python.org/issue1124861 37 child = subprocess.Popen(str(commandline), 38 stdin=subprocess.PIPE, 39 stdout=subprocess.PIPE, 40 stderr=subprocess.PIPE, 41 shell=(sys.platform!="win32")) 42 #Use .communicate as might get deadlocks with .wait(), see Bug 2804/2806 43 r_out, e_out = child.communicate() 44 # capture error code: 45 error_code = child.returncode 46 return ApplicationResult(commandline, error_code), \ 47 File.UndoHandle(StringIO.StringIO(r_out)), \ 48 File.UndoHandle(StringIO.StringIO(e_out))
49
50 -class ApplicationResult:
51 """Make results of a program available through a standard interface (OBSOLETE). 52 53 This tries to pick up output information available from the program 54 and make it available programmatically. 55 56 NOTE - This obsolete is considered to be obsolete, and we intend to 57 deprecate it and then remove it in future releases of Biopython. 58 """
59 - def __init__(self, application_cl, return_code):
60 """Intialize with the commandline from the program. 61 """ 62 self._cl = application_cl 63 64 # provide the return code of the application 65 self.return_code = return_code 66 67 # get the application dependent results we can provide 68 # right now the only results we handle are output files 69 self._results = {} 70 71 for parameter in self._cl.parameters: 72 if "file" in parameter.param_types and \ 73 "output" in parameter.param_types: 74 if parameter.is_set: 75 self._results[parameter.names[-1]] = parameter.value
76
77 - def get_result(self, output_name):
78 """Retrieve result information for the given output. 79 80 Supports any of the defined parameters aliases (assuming the 81 parameter is defined as an output). 82 """ 83 try : 84 return self._results[output_name] 85 except KeyError, err : 86 #Try the aliases... 87 for parameter in self._cl.parameters: 88 if output_name in parameter.names : 89 return self._results[parameter.names[-1]] 90 #No, really was a key error: 91 raise err
92
93 - def available_results(self):
94 """Retrieve a list of all available results. 95 """ 96 result_names = self._results.keys() 97 result_names.sort() 98 return result_names
99
100 -class AbstractCommandline(object):
101 """Generic interface for constructing command line strings. 102 103 This class shouldn't be called directly; it should be subclassed to 104 provide an implementation for a specific application. 105 106 For a usage example we'll show one of the EMBOSS wrappers. You can set 107 options when creating the wrapper object using keyword arguments - or later 108 using their corresponding properties: 109 110 >>> from Bio.Emboss.Applications import WaterCommandline 111 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 112 >>> cline 113 WaterCommandline(cmd='water', gapopen=10, gapextend=0.5) 114 115 You can instead manipulate the parameters via their properties, e.g. 116 117 >>> cline.gapopen 118 10 119 >>> cline.gapopen = 20 120 >>> cline 121 WaterCommandline(cmd='water', gapopen=20, gapextend=0.5) 122 123 You can clear a parameter you have already added by 'deleting' the 124 corresponding property: 125 126 >>> del cline.gapopen 127 >>> cline.gapopen 128 >>> cline 129 WaterCommandline(cmd='water', gapextend=0.5) 130 131 Once you have set the parameters you need, turn the object into a string: 132 133 >>> str(cline) 134 Traceback (most recent call last): 135 ... 136 ValueError: You must either set outfile (output filename), or enable filter or stdout (output to stdout). 137 138 In this case the wrapper knows certain arguments are required to construct 139 a valid command line for the tool. For a complete example, 140 141 >>> from Bio.Emboss.Applications import WaterCommandline 142 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 143 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 144 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 145 >>> cline.outfile = "temp_water.txt" 146 >>> print cline 147 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 148 >>> cline 149 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 150 151 You would typically run the command line via a standard Python operating 152 system call (e.g. using the subprocess module). 153 """
154 - def __init__(self, cmd, **kwargs):
155 """Create a new instance of a command line wrapper object.""" 156 # Init method - should be subclassed! 157 # 158 # The subclass methods should look like this: 159 # 160 # def __init__(self, cmd="muscle", **kwargs) : 161 # self.parameters = [...] 162 # AbstractCommandline.__init__(self, cmd, **kwargs) 163 # 164 # i.e. There should have an optional argument "cmd" to set the location 165 # of the executable (with a sensible default which should work if the 166 # command is on the path on Unix), and keyword arguments. It should 167 # then define a list of parameters, all objects derived from the base 168 # class _AbstractParameter. 169 # 170 # The keyword arguments should be any valid parameter name, and will 171 # be used to set the associated parameter. 172 self.program_name = cmd 173 try : 174 parameters = self.parameters 175 except AttributeError : 176 raise AttributeError("Subclass should have defined self.parameters") 177 #Create properties for each parameter at run time 178 aliases = set() 179 for p in parameters : 180 for name in p.names : 181 if name in aliases : 182 raise ValueError("Parameter alias %s multiply defined" \ 183 % name) 184 aliases.add(name) 185 name = p.names[-1] 186 #Beware of binding-versus-assignment confusion issues 187 def getter(name) : 188 return lambda x : x._get_parameter(name)
189 def setter(name) : 190 return lambda x, value : x.set_parameter(name, value)
191 def deleter(name) : 192 return lambda x : x._clear_parameter(name) 193 doc = p.description 194 if isinstance(p, _Switch) : 195 doc += "\n\nThis property controls the addition of the %s " \ 196 "switch, treat this property as a boolean." % p.names[0] 197 else : 198 doc += "\n\nThis controls the addition of the %s parameter " \ 199 "and its associated value. Set this property to the " \ 200 "argument value required." % p.names[0] 201 prop = property(getter(name), setter(name), deleter(name), doc) 202 setattr(self.__class__, name, prop) #magic! 203 for key, value in kwargs.iteritems() : 204 self.set_parameter(key, value) 205
206 - def _validate(self):
207 """Make sure the required parameters have been set (PRIVATE). 208 209 No return value - it either works or raises a ValueError. 210 211 This is a separate method (called from __str__) so that subclasses may 212 override it. 213 """ 214 for p in self.parameters: 215 #Check for missing required parameters: 216 if p.is_required and not(p.is_set): 217 raise ValueError("Parameter %s is not set." \ 218 % p.names[-1])
219 #Also repeat the parameter validation here, just in case? 220
221 - def __str__(self):
222 """Make the commandline string with the currently set options. 223 224 e.g. 225 >>> from Bio.Emboss.Applications import WaterCommandline 226 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 227 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 228 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 229 >>> cline.outfile = "temp_water.txt" 230 >>> print cline 231 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 232 >>> str(cline) 233 'water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5' 234 """ 235 self._validate() 236 commandline = "%s " % self.program_name 237 for parameter in self.parameters: 238 if parameter.is_set: 239 #This will include a trailing space: 240 commandline += str(parameter) 241 return commandline.strip() # remove trailing space
242
243 - def __repr__(self):
244 """Return a representation of the command line object for debugging. 245 246 e.g. 247 >>> from Bio.Emboss.Applications import WaterCommandline 248 >>> cline = WaterCommandline(gapopen=10, gapextend=0.5) 249 >>> cline.asequence = "asis:ACCCGGGCGCGGT" 250 >>> cline.bsequence = "asis:ACCCGAGCGCGGT" 251 >>> cline.outfile = "temp_water.txt" 252 >>> print cline 253 water -outfile=temp_water.txt -asequence=asis:ACCCGGGCGCGGT -bsequence=asis:ACCCGAGCGCGGT -gapopen=10 -gapextend=0.5 254 >>> cline 255 WaterCommandline(cmd='water', outfile='temp_water.txt', asequence='asis:ACCCGGGCGCGGT', bsequence='asis:ACCCGAGCGCGGT', gapopen=10, gapextend=0.5) 256 """ 257 answer = "%s(cmd=%s" % (self.__class__.__name__, repr(self.program_name)) 258 for parameter in self.parameters: 259 if parameter.is_set: 260 if isinstance(parameter, _Switch): 261 answer += ", %s=True" % parameter.names[-1] 262 else : 263 answer += ", %s=%s" \ 264 % (parameter.names[-1], repr(parameter.value)) 265 answer += ")" 266 return answer
267
268 - def _get_parameter(self, name) :
269 """Get a commandline option value.""" 270 for parameter in self.parameters: 271 if name in parameter.names: 272 if isinstance(parameter, _Switch) : 273 return parameter.is_set 274 else : 275 return parameter.value 276 raise ValueError("Option name %s was not found." % name)
277
278 - def _clear_parameter(self, name) :
279 """Reset or clear a commandline option value.""" 280 cleared_option = False 281 for parameter in self.parameters: 282 if name in parameter.names: 283 parameter.value = None 284 parameter.is_set = False 285 cleared_option = True 286 if not cleared_option : 287 raise ValueError("Option name %s was not found." % name)
288
289 - def set_parameter(self, name, value = None):
290 """Set a commandline option for a program. 291 """ 292 set_option = False 293 for parameter in self.parameters: 294 if name in parameter.names: 295 if isinstance(parameter, _Switch) : 296 if value is None : 297 import warnings 298 warnings.warn("For a switch type argument like %s, " 299 "we expect a boolean. None is treated " 300 "as FALSE!" % parameter.names[-1]) 301 parameter.is_set = bool(value) 302 set_option = True 303 else : 304 if value is not None: 305 self._check_value(value, name, parameter.checker_function) 306 parameter.value = value 307 parameter.is_set = True 308 set_option = True 309 if not set_option : 310 raise ValueError("Option name %s was not found." % name)
311
312 - def _check_value(self, value, name, check_function):
313 """Check whether the given value is valid. 314 315 No return value - it either works or raises a ValueError. 316 317 This uses the passed function 'check_function', which can either 318 return a [0, 1] (bad, good) value or raise an error. Either way 319 this function will raise an error if the value is not valid, or 320 finish silently otherwise. 321 """ 322 if check_function is not None: 323 is_good = check_function(value) #May raise an exception 324 assert is_good in [0,1,True,False] 325 if not is_good : 326 raise ValueError("Invalid parameter value %r for parameter %s" \ 327 % (value, name))
328
329 -class _AbstractParameter:
330 """A class to hold information about a parameter for a commandline. 331 332 Do not use this directly, instead use one of the subclasses. 333 """
334 - def __init__(self) :
335 raise NotImplementedError
336
337 - def __str__(self) :
338 raise NotImplementedError
339
340 -class _Option(_AbstractParameter):
341 """Represent an option that can be set for a program. 342 343 This holds UNIXish options like --append=yes and -a yes, 344 where a value (here "yes") is generally expected. 345 346 For UNIXish options like -kimura in clustalw which don't 347 take a value, use the _Switch object instead. 348 349 Attributes: 350 351 o names -- a list of string names by which the parameter can be 352 referenced (ie. ["-a", "--append", "append"]). The first name in 353 the list is considered to be the one that goes on the commandline, 354 for those parameters that print the option. The last name in the list 355 is assumed to be a "human readable" name describing the option in one 356 word. 357 358 o param_types -- a list of string describing the type of parameter, 359 which can help let programs know how to use it. Example descriptions 360 include 'input', 'output', 'file'. Note that if 'file' is included, 361 these argument values will automatically be escaped if the filename 362 contains spaces. 363 364 o checker_function -- a reference to a function that will determine 365 if a given value is valid for this parameter. This function can either 366 raise an error when given a bad value, or return a [0, 1] decision on 367 whether the value is correct. 368 369 o equate -- should an equals sign be inserted if a value is used? 370 371 o description -- a description of the option. 372 373 o is_required -- a flag to indicate if the parameter must be set for 374 the program to be run. 375 376 o is_set -- if the parameter has been set 377 378 o value -- the value of a parameter 379 """
380 - def __init__(self, names = [], types = [], checker_function = None, 381 is_required = False, description = "", equate=True):
382 self.names = names 383 self.param_types = types 384 self.checker_function = checker_function 385 self.description = description 386 self.equate = equate 387 self.is_required = is_required 388 389 self.is_set = False 390 self.value = None
391
392 - def __str__(self):
393 """Return the value of this option for the commandline. 394 395 Includes a trailing space. 396 """ 397 # Note: Before equate was handled explicitly, the old 398 # code would do either "--name " or "--name=value ", 399 # or " -name " or " -name value ". This choice is now 400 # now made explicitly when setting up the option. 401 if self.value is None : 402 return "%s " % self.names[0] 403 if "file" in self.param_types : 404 v = _escape_filename(self.value) 405 else : 406 v = str(self.value) 407 if self.equate : 408 return "%s=%s " % (self.names[0], v) 409 else : 410 return "%s %s " % (self.names[0], v)
411
412 -class _Switch(_AbstractParameter):
413 """Represent an optional argument switch for a program. 414 415 This holds UNIXish options like -kimura in clustalw which don't 416 take a value, they are either included in the command string 417 or omitted. 418 419 o names -- a list of string names by which the parameter can be 420 referenced (ie. ["-a", "--append", "append"]). The first name in 421 the list is considered to be the one that goes on the commandline, 422 for those parameters that print the option. The last name in the list 423 is assumed to be a "human readable" name describing the option in one 424 word. 425 426 o param_types -- a list of string describing the type of parameter, 427 which can help let programs know how to use it. Example descriptions 428 include 'input', 'output', 'file'. Note that if 'file' is included, 429 these argument values will automatically be escaped if the filename 430 contains spaces. 431 432 o description -- a description of the option. 433 434 o is_set -- if the parameter has been set 435 436 NOTE - There is no value attribute, see is_set instead, 437 """
438 - def __init__(self, names = [], types = [], description = ""):
439 self.names = names 440 self.param_types = types 441 self.description = description 442 self.is_set = False 443 self.is_required = False
444
445 - def __str__(self):
446 """Return the value of this option for the commandline. 447 448 Includes a trailing space. 449 """ 450 assert not hasattr(self, "value") 451 if self.is_set : 452 return "%s " % self.names[0] 453 else : 454 return ""
455
456 -class _Argument(_AbstractParameter):
457 """Represent an argument on a commandline. 458 """
459 - def __init__(self, names = [], types = [], checker_function = None, 460 is_required = False, description = ""):
461 self.names = names 462 self.param_types = types 463 self.checker_function = checker_function 464 self.description = description 465 self.is_required = is_required 466 self.is_set = False 467 self.value = None
468
469 - def __str__(self):
470 if self.value is None: 471 return " " 472 else : 473 return "%s " % self.value
474
475 -def _escape_filename(filename) :
476 """Escape filenames with spaces by adding quotes (PRIVATE). 477 478 Note this will not add quotes if they are already included: 479 480 >>> print _escape_filename('example with spaces') 481 "example with spaces" 482 >>> print _escape_filename('"example with spaces"') 483 "example with spaces" 484 """ 485 #Is adding the following helpful 486 #if os.path.isfile(filename) : 487 # #On Windows, if the file exists, we can ask for 488 # #its alternative short name (DOS style 8.3 format) 489 # #which has no spaces in it. Note that this name 490 # #is not portable between machines, or even folder! 491 # try : 492 # import win32api 493 # short = win32api.GetShortPathName(filename) 494 # assert os.path.isfile(short) 495 # return short 496 # except ImportError : 497 # pass 498 if " " not in filename : 499 return filename 500 #We'll just quote it - works on Windows, Mac OS X etc 501 if filename.startswith('"') and filename.endswith('"') : 502 #Its already quoted 503 return filename 504 else : 505 return '"%s"' % filename
506
507 -def _test():
508 """Run the Bio.Application module's doctests. 509 510 This will try and locate the unit tests directory, and run the doctests 511 from there in order that the relative paths used in the examples work. 512 """ 513 import doctest 514 doctest.testmod(verbose=1)
515 516 if __name__ == "__main__": 517 #Run the doctests 518 _test() 519