Package flumotion :: Package common :: Module bundle
[hide private]

Source Code for Module flumotion.common.bundle

  1  # -*- Mode: Python; test-case-name: flumotion.test.test_bundle -*- 
  2  # vi:si:et:sw=4:sts=4:ts=4 
  3  # 
  4  # Flumotion - a streaming media server 
  5  # Copyright (C) 2004,2005,2006,2007 Fluendo, S.L. (www.fluendo.com). 
  6  # All rights reserved. 
  7   
  8  # This file may be distributed and/or modified under the terms of 
  9  # the GNU General Public License version 2 as published by 
 10  # the Free Software Foundation. 
 11  # This file is distributed without any warranty; without even the implied 
 12  # warranty of merchantability or fitness for a particular purpose. 
 13  # See "LICENSE.GPL" in the source distribution for more information. 
 14   
 15  # Licensees having purchased or holding a valid Flumotion Advanced 
 16  # Streaming Server license may use this file in accordance with the 
 17  # Flumotion Advanced Streaming Server Commercial License Agreement. 
 18  # See "LICENSE.Flumotion" in the source distribution for more information. 
 19   
 20  # Headers in this file shall remain intact. 
 21   
 22  """ 
 23  bundles of files used to implement caching over the network 
 24  """ 
 25   
 26  import errno 
 27  import md5 
 28  import os 
 29  import sys 
 30  import zipfile 
 31  import tempfile 
 32  import StringIO 
 33   
 34  from flumotion.common import errors, dag 
 35   
 36  __all__ = ['Bundle', 'Bundler', 'Unbundler', 'BundlerBasket'] 
 37   
38 -class BundledFile:
39 """ 40 I represent one file as managed by a bundler. 41 """
42 - def __init__(self, source, destination):
43 self.source = source 44 self.destination = destination 45 self._last_md5sum = None 46 self._last_timestamp = None 47 self.zipped = False
48
49 - def md5sum(self):
50 """ 51 Calculate the md5sum of the given file. 52 53 @returns: the md5 sum a 32 character string of hex characters. 54 """ 55 data = open(self.source, "r").read() 56 return md5.new(data).hexdigest()
57
58 - def timestamp(self):
59 """ 60 @returns: the last modified timestamp for the file. 61 """ 62 return os.path.getmtime(self.source)
63
64 - def hasChanged(self):
65 """ 66 Check if the file has changed since it was last checked. 67 68 @rtype: boolean 69 """ 70 71 # if it wasn't zipped yet, it needs zipping, so we pretend it 72 # was changed 73 # FIXME: move this out here 74 if not self.zipped: 75 return True 76 77 timestamp = self.timestamp() 78 # if file still has an old timestamp, it hasn't changed 79 # FIXME: looks bogus, shouldn't this check be != instead of <= ? 80 if self._last_timestamp and timestamp <= self._last_timestamp: 81 return False 82 self._last_timestamp = timestamp 83 84 # if the md5sum has changed, it has changed 85 md5sum = self.md5sum() 86 if self._last_md5sum != md5sum: 87 self._last_md5sum = md5sum 88 return True 89 90 return False
91
92 - def pack(self, zip):
93 self._last_timestamp = self.timestamp() 94 self._last_md5sum = self.md5sum() 95 zip.write(self.source, self.destination) 96 self.zipped = True
97
98 -class Bundle:
99 """ 100 I am a bundle of files, represented by a zip file and md5sum. 101 """
102 - def __init__(self, name):
103 self.zip = None 104 self.md5sum = None 105 self.name = name
106
107 - def setZip(self, zip):
108 """ 109 Set the bundle to the given data representation of the zip file. 110 """ 111 self.zip = zip 112 self.md5sum = md5.new(self.zip).hexdigest()
113
114 - def getZip(self):
115 """ 116 Get the bundle's zip data. 117 """ 118 return self.zip
119
120 -class Unbundler:
121 """ 122 I unbundle bundles by unpacking them in the given directory 123 under directories with the bundle's md5sum. 124 """
125 - def __init__(self, directory):
126 self._undir = directory
127
128 - def unbundlePathByInfo(self, name, md5sum):
129 """ 130 Return the full path where a bundle with the given name and md5sum 131 would be unbundled to. 132 """ 133 return os.path.join(self._undir, name, md5sum)
134
135 - def unbundlePath(self, bundle):
136 """ 137 Return the full path where this bundle will/would be unbundled to. 138 """ 139 return self.unbundlePathByInfo(bundle.name, bundle.md5sum)
140
141 - def unbundle(self, bundle):
142 """ 143 Unbundle the given bundle. 144 145 @type bundle: L{flumotion.common.bundle.Bundle} 146 147 @rtype: string 148 @returns: the full path to the directory where it was unpacked 149 """ 150 dir = self.unbundlePath(bundle) 151 152 filelike = StringIO.StringIO(bundle.getZip()) 153 zip = zipfile.ZipFile(filelike, "r") 154 zip.testzip() 155 156 filepaths = zip.namelist() 157 for filepath in filepaths: 158 path = os.path.join(dir, filepath) 159 parent = os.path.split(path)[0] 160 try: 161 os.makedirs(parent) 162 except OSError, err: 163 # Reraise error unless if it's an already existing 164 if err.errno != errno.EEXIST or not os.path.isdir(parent): 165 raise 166 data = zip.read(filepath) 167 168 # atomically write to path, see #373 169 fd, tempname = tempfile.mkstemp(dir=parent) 170 handle = os.fdopen(fd, 'wb') 171 handle.write(data) 172 handle.close() 173 os.rename(tempname, path) 174 return dir
175
176 -class Bundler:
177 """ 178 I bundle files into a bundle so they can be cached remotely easily. 179 """
180 - def __init__(self, name):
181 """ 182 Create a new bundle. 183 """ 184 self._files = {} # dictionary of BundledFile's indexed on path 185 self.name = name 186 self._bundle = Bundle(name)
187
188 - def add(self, source, destination = None):
189 """ 190 Add files to the bundle. 191 192 @param source: the path to the file to add to the bundle. 193 @param destination: a relative path to store this file in in the bundle. 194 If unspecified, this will be stored in the top level. 195 196 @returns: the path the file got stored as 197 """ 198 if destination == None: 199 destination = os.path.split(source)[1] 200 self._files[source] = BundledFile(source, destination) 201 return destination
202
203 - def bundle(self):
204 """ 205 Bundle the files registered with the bundler. 206 207 @rtype: L{flumotion.common.bundle.Bundle} 208 """ 209 # rescan files registered in the bundle, and check if we need to 210 # rebuild the internal zip 211 if not self._bundle.getZip(): 212 self._bundle.setZip(self._buildzip()) 213 return self._bundle 214 215 update = False 216 for file in self._files.values(): 217 if file.hasChanged(): 218 update = True 219 break 220 221 if update: 222 self._bundle.setZip(self._buildzip()) 223 224 return self._bundle
225 226 # build the zip file containing the files registered in the bundle 227 # and return the zip file data
228 - def _buildzip(self):
229 filelike = StringIO.StringIO() 230 zip = zipfile.ZipFile(filelike, "w") 231 for bundledFile in self._files.values(): 232 bundledFile.pack(zip) 233 zip.close() 234 data = filelike.getvalue() 235 filelike.close() 236 return data
237
238 -class BundlerBasket:
239 """ 240 I manage bundlers that are registered through me. 241 """
242 - def __init__(self):
243 """ 244 Create a new bundler basket. 245 """ 246 self._bundlers = {} # bundler name -> bundle 247 248 self._files = {} # filename -> bundle name 249 self._imports = {} # import statements -> bundle name 250 251 self._graph = dag.DAG()
252
253 - def add(self, bundleName, source, destination = None):
254 """ 255 Add files to the bundler basket for the given bundle. 256 257 @param bundleName: the name of the bundle this file is a part of 258 @param source: the path to the file to add to the bundle 259 @param destination: a relative path to store this file in in the bundle. 260 If unspecified, this will be stored in the top level 261 """ 262 # get the bundler and create it if need be 263 if not bundleName in self._bundlers: 264 bundler = Bundler(bundleName) 265 self._bundlers[bundleName] = bundler 266 else: 267 bundler = self._bundlers[bundleName] 268 269 # add the file to the bundle and register 270 location = bundler.add(source, destination) 271 if location in self._files: 272 raise Exception("Cannot add %s to bundle %s, already in %s" % ( 273 location, bundleName, self._files[location])) 274 self._files[location] = bundleName 275 276 # add possible imports from this file 277 package = None 278 if location.endswith('.py'): 279 package = location[:-3] 280 elif location.endswith('.pyc'): 281 package = location[:-4] 282 283 if package: 284 if package.endswith('__init__'): 285 package = os.path.split(package)[0] 286 287 package = ".".join(package.split('/')) # win32 fixme 288 if package in self._imports: 289 raise Exception("Bundler %s already has import %s" % ( 290 bundleName, package)) 291 self._imports[package] = bundleName
292
293 - def depend(self, depender, *dependencies):
294 """ 295 Make the given bundle depend on the other given bundles. 296 297 @type depender: string 298 @type dependencies: list of strings 299 """ 300 # note that a bundler doesn't necessarily need to be registered yet 301 if not self._graph.hasNode(depender): 302 self._graph.addNode(depender) 303 for dep in dependencies: 304 if not self._graph.hasNode(dep): 305 self._graph.addNode(dep) 306 self._graph.addEdge(depender, dep)
307
308 - def getDependencies(self, bundlerName):
309 """ 310 Return names of all the dependencies of this bundle, including this 311 bundle itself. 312 The dependencies are returned in a correct depending order. 313 """ 314 if not bundlerName in self._bundlers: 315 raise errors.NoBundleError('Unknown bundle %s' % bundlerName) 316 elif not self._graph.hasNode(bundlerName): 317 return [bundlerName] 318 else: 319 return [bundlerName] + self._graph.getOffspring(bundlerName)
320
321 - def getBundlerByName(self, bundlerName):
322 """ 323 Return the bundle by name, or None if not found. 324 """ 325 if self._bundlers.has_key(bundlerName): 326 return self._bundlers[bundlerName] 327 return None
328
329 - def getBundlerNameByImport(self, importString):
330 """ 331 Return the bundler name by import statement, or None if not found. 332 """ 333 if self._imports.has_key(importString): 334 return self._imports[importString] 335 return None
336
337 - def getBundlerNameByFile(self, filename):
338 """ 339 Return the bundler name by filename, or None if not found. 340 """ 341 if self._files.has_key(filename): 342 return self._files[filename] 343 return None
344
345 -class MergedBundler(Bundler):
346 """ 347 I am a bundler, with the extension that I can also bundle other 348 bundlers. 349 350 The effect is that when you call bundle() on a me, you get one 351 bundle with a union of all subbundlers' files, in addition to any 352 loose files that you added to me. 353 """
354 - def __init__(self, name='merged-bundle'):
355 Bundler.__init__(self, name) 356 self._subbundlers = {}
357
358 - def addBundler(self, bundler):
359 """Add to me all of the files managed by another bundler. 360 361 @param bundler: The bundler whose files you want in this 362 bundler. 363 @type bundler: L{Bundler} 364 """ 365 if bundler.name not in self._subbundlers: 366 self._subbundlers[bundler.name] = bundler 367 for bfile in bundler._files.values(): 368 self.add(bfile.source, bfile.destination)
369
370 - def getSubBundlers(self):
371 """ 372 @returns: A list of all of the bundlers that have been added to 373 me. 374 """ 375 return self._subbundlers.values()
376
377 -def makeBundleFromLoadedModules(outfile, outreg, *prefixes):
378 """ 379 Make a bundle from a subset of all loaded modules, also writing out 380 a registry file that can apply to that subset of the global 381 registry. Suitable for use as a FLU_ATEXIT handler. 382 383 @param outfile: The path to which a zip file will be written. 384 @type outfile: str 385 @param outreg: The path to which a registry file will be written. 386 @type outreg: str 387 @param prefixes: A list of prefixes to which to limit the export. If 388 not given, package up all modules. For example, "flumotion" would 389 limit the output to modules that start with "flumotion". 390 @type prefixes: list of str 391 """ 392 from flumotion.common import registry, log 393 from twisted.python import reflect 394 395 def getUsedModules(prefixes): 396 ret = {} 397 for modname in sys.modules: 398 if prefixes and not filter(modname.startswith, prefixes): 399 continue 400 try: 401 module = reflect.namedModule(modname) 402 if hasattr(module, '__file__'): 403 ret[modname] = module 404 else: 405 log.info('makebundle', 'Module %s has no file', module) 406 except ImportError: 407 log.info('makebundle', 'Could not import %s', modname) 408 return ret
409 410 def calculateModuleBundleMap(): 411 allbundles = registry.getRegistry().getBundles() 412 ret = {} 413 for bundle in allbundles: 414 for directory in bundle.getDirectories(): 415 for file in directory.getFiles(): 416 path = os.path.join(directory.getName(), file.getLocation()) 417 parts = path.split(os.path.sep) 418 if parts[-1].startswith('__init__.py'): 419 parts.pop() 420 elif parts[-1].endswith('.py'): 421 parts[-1] = parts[-1][:-3] 422 else: 423 # not a bundled module 424 continue 425 modname = '.'.join(parts) 426 ret[modname] = bundle 427 return ret 428 429 def makeMergedBundler(modules, modulebundlemap): 430 ret = MergedBundler() 431 basket = registry.getRegistry().makeBundlerBasket() 432 for modname in modules: 433 modfilename = modules[modname].__file__ 434 if modname in modulebundlemap: 435 bundleName = modulebundlemap[modname].getName() 436 for depBundleName in basket.getDependencies(bundleName): 437 ret.addBundler(basket.getBundlerByName(depBundleName)) 438 else: 439 if modfilename.endswith('.pyc'): 440 modfilename = modfilename[:-1] 441 if os.path.isdir(modfilename): 442 with_init = os.path.join(modfilename, '__init__.py') 443 if os.path.exists(with_init): 444 modfilename = with_init 445 nparts = len(modname.split('.')) 446 if '__init__' in modfilename: 447 nparts += 1 448 relpath = os.path.join(*modfilename.split(os.path.sep)[-nparts:]) 449 ret.add(modfilename, relpath) 450 return ret 451 452 modules = getUsedModules(prefixes) 453 modulebundlemap = calculateModuleBundleMap() 454 bundler = makeMergedBundler(modules, modulebundlemap) 455 456 print 'Writing bundle to', outfile 457 open(outfile, 'w').write(bundler.bundle().getZip()) 458 459 print 'Writing registry to', outreg 460 bundlers_used = [b.name for b in bundler.getSubBundlers()] 461 regwriter = registry.RegistrySubsetWriter(onlyBundles=bundlers_used) 462 regwriter.dump(open(outreg, 'w')) 463