From: Rob Browning Date: Sun, 17 Oct 2021 17:17:36 +0000 (-0500) Subject: ShaBloom.__del__: replace with context management X-Git-Url: https://arthur.ath.cx/gitweb/?a=commitdiff_plain;h=f2938ade564e0e2b57368da6c5e74cfe5eac4a16;p=bup.git ShaBloom.__del__: replace with context management These changes also just use finally in some cases, instead of the more complex py2 compatible BaseException/with_pending_raise() catch becase I'm leaning in favor of just dropping python 2 support soon. Signed-off-by: Rob Browning Tested-by: Rob Browning --- diff --git a/lib/bup/bloom.py b/lib/bup/bloom.py index 05ebd71..97024b9 100644 --- a/lib/bup/bloom.py +++ b/lib/bup/bloom.py @@ -84,6 +84,7 @@ from __future__ import absolute_import import os, math, struct from bup import _helpers +from bup.compat import pending_raise from bup.helpers import (debug1, debug2, log, mmap_read, mmap_readwrite, mmap_readwrite_private, unlink) @@ -107,12 +108,13 @@ class ShaBloom: """Wrapper which contains data from multiple index files. """ def __init__(self, filename, f=None, readwrite=False, expected=-1): self.name = filename - self.rwfile = None + self.readwrite = readwrite + self.file = None self.map = None assert(filename.endswith(b'.bloom')) if readwrite: assert(expected > 0) - self.rwfile = f = f or open(filename, 'r+b') + self.file = f = f or open(filename, 'r+b') f.seek(0) # Decide if we want to mmap() the pages as writable ('immediate' @@ -135,13 +137,12 @@ class ShaBloom: self.delaywrite = expected > pages debug1('bloom: delaywrite=%r\n' % self.delaywrite) if self.delaywrite: - self.map = mmap_readwrite_private(self.rwfile, close=False) + self.map = mmap_readwrite_private(self.file, close=False) else: - self.map = mmap_readwrite(self.rwfile, close=False) + self.map = mmap_readwrite(self.file, close=False) else: - self.rwfile = None - f = f or open(filename, 'rb') - self.map = mmap_read(f) + self.file = f or open(filename, 'rb') + self.map = mmap_read(self.file) got = self.map[0:4] if got != b'BLOM': log('Warning: invalid BLOM header (%r) in %r\n' % (got, filename)) @@ -167,33 +168,42 @@ class ShaBloom: self.idxnames = [] def _init_failed(self): - if self.map: - self.map = None - if self.rwfile: - self.rwfile.close() - self.rwfile = None self.idxnames = [] self.bits = self.entries = 0 + self.map, tmp_map = None, self.map + self.file, tmp_file = None, self.file + try: + if tmp_map: + tmp_map.close() + finally: # This won't handle pending exceptions correctly in py2 + if self.file: + tmp_file.close() def valid(self): return self.map and self.bits - def __del__(self): - self.close() - def close(self): - if self.map and self.rwfile: - debug2("bloom: closing with %d entries\n" % self.entries) - self.map[12:16] = struct.pack('!I', self.entries) - if self.delaywrite: - self.rwfile.seek(0) - self.rwfile.write(self.map) - else: - self.map.flush() - self.rwfile.seek(16 + 2**self.bits) - if self.idxnames: - self.rwfile.write(b'\0'.join(self.idxnames)) - self._init_failed() + try: + if self.map and self.readwrite: + debug2("bloom: closing with %d entries\n" % self.entries) + self.map[12:16] = struct.pack('!I', self.entries) + if self.delaywrite: + self.file.seek(0) + self.file.write(self.map) + else: + self.map.flush() + self.file.seek(16 + 2**self.bits) + if self.idxnames: + self.file.write(b'\0'.join(self.idxnames)) + finally: # This won't handle pending exceptions correctly in py2 + self._init_failed() + + def __enter__(self): + return self + + def __exit__(self, type, value, traceback): + with pending_raise(value, rethrow=False): + self.close() def pfalse_positive(self, additional=0): n = self.entries + additional diff --git a/lib/bup/cmd/bloom.py b/lib/bup/cmd/bloom.py index 1896f5e..f995964 100755 --- a/lib/bup/cmd/bloom.py +++ b/lib/bup/cmd/bloom.py @@ -28,8 +28,8 @@ def ruin_bloom(bloomfilename): log(path_msg(bloomfilename) + '\n') add_error('bloom: %s not found to ruin\n' % path_msg(rbloomfilename)) return - b = bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) - b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits + with bloom.ShaBloom(bloomfilename, readwrite=True, expected=1) as b: + b.map[16 : 16 + 2**b.bits] = b'\0' * 2**b.bits def check_bloom(path, bloomfilename, idx): @@ -38,21 +38,21 @@ def check_bloom(path, bloomfilename, idx): if not os.path.exists(bloomfilename): log('bloom: %s: does not exist.\n' % path_msg(rbloomfilename)) return - b = bloom.ShaBloom(bloomfilename) - if not b.valid(): - add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename)) - return - base = os.path.basename(idx) - if base not in b.idxnames: - log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename)) - return - if base == idx: - idx = os.path.join(path, idx) - log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) - log('bloom: checking %s\n' % path_msg(ridx)) - for objsha in git.open_idx(idx): - if not b.exists(objsha): - add_error('bloom: ERROR: object %s missing' % hexstr(objsha)) + with bloom.ShaBloom(bloomfilename) as b: + if not b.valid(): + add_error('bloom: %r is invalid.\n' % path_msg(rbloomfilename)) + return + base = os.path.basename(idx) + if base not in b.idxnames: + log('bloom: %s does not contain the idx.\n' % path_msg(rbloomfilename)) + return + if base == idx: + idx = os.path.join(path, idx) + log('bloom: bloom file: %s\n' % path_msg(rbloomfilename)) + log('bloom: checking %s\n' % path_msg(ridx)) + for objsha in git.open_idx(idx): + if not b.exists(objsha): + add_error('bloom: ERROR: object %s missing' % hexstr(objsha)) _first = None @@ -60,79 +60,86 @@ def do_bloom(path, outfilename, k, force): global _first assert k in (None, 4, 5) b = None - if os.path.exists(outfilename) and not force: - b = bloom.ShaBloom(outfilename) - if not b.valid(): - debug1("bloom: Existing invalid bloom found, regenerating.\n") - b = None - - add = [] - rest = [] - add_count = 0 - rest_count = 0 - for i, name in enumerate(glob.glob(b'%s/*.idx' % path)): - progress('bloom: counting: %d\r' % i) - ix = git.open_idx(name) - ixbase = os.path.basename(name) - if b and (ixbase in b.idxnames): - rest.append(name) - rest_count += len(ix) - else: - add.append(name) - add_count += len(ix) - - if not add: - debug1("bloom: nothing to do.\n") - return - - if b: - if len(b) != rest_count: - debug1("bloom: size %d != idx total %d, regenerating\n" - % (len(b), rest_count)) - b = None - elif k is not None and k != b.k: - debug1("bloom: new k %d != existing k %d, regenerating\n" - % (k, b.k)) - b = None - elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and - b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): - debug1("bloom: regenerating: adding %d entries gives " - "%.2f%% false positives.\n" - % (add_count, b.pfalse_positive(add_count))) - b = None - else: - b = bloom.ShaBloom(outfilename, readwrite=True, expected=add_count) - if not b: # Need all idxs to build from scratch - add += rest - add_count += rest_count - del rest - del rest_count - - msg = b is None and 'creating from' or 'adding' - if not _first: _first = path - dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b'' - progress('bloom: %s%s %d file%s (%d object%s).\r' - % (path_msg(dirprefix), msg, - len(add), len(add)!=1 and 's' or '', - add_count, add_count!=1 and 's' or '')) - - tfname = None - if b is None: - tfname = os.path.join(path, b'bup.tmp.bloom') - b = bloom.create(tfname, expected=add_count, k=k) - count = 0 - icount = 0 - for name in add: - ix = git.open_idx(name) - qprogress('bloom: writing %.2f%% (%d/%d objects)\r' - % (icount*100.0/add_count, icount, add_count)) - b.add_idx(ix) - count += 1 - icount += len(ix) - - # Currently, there's an open file object for tfname inside b. - # Make sure it's closed before rename. - b.close() + try: + if os.path.exists(outfilename) and not force: + b = bloom.ShaBloom(outfilename) + if not b.valid(): + debug1("bloom: Existing invalid bloom found, regenerating.\n") + b.close() + b = None + + add = [] + rest = [] + add_count = 0 + rest_count = 0 + for i, name in enumerate(glob.glob(b'%s/*.idx' % path)): + progress('bloom: counting: %d\r' % i) + ix = git.open_idx(name) + ixbase = os.path.basename(name) + if b and (ixbase in b.idxnames): + rest.append(name) + rest_count += len(ix) + else: + add.append(name) + add_count += len(ix) + + if not add: + debug1("bloom: nothing to do.\n") + return + + if b: + if len(b) != rest_count: + debug1("bloom: size %d != idx total %d, regenerating\n" + % (len(b), rest_count)) + b, b_tmp = None, b + b_tmp.close() + elif k is not None and k != b.k: + debug1("bloom: new k %d != existing k %d, regenerating\n" + % (k, b.k)) + b, b_tmp = None, b + b_tmp.close() + elif (b.bits < bloom.MAX_BLOOM_BITS[b.k] and + b.pfalse_positive(add_count) > bloom.MAX_PFALSE_POSITIVE): + debug1("bloom: regenerating: adding %d entries gives " + "%.2f%% false positives.\n" + % (add_count, b.pfalse_positive(add_count))) + b, b_tmp = None, b + b_tmp.close() + else: + b = bloom.ShaBloom(outfilename, readwrite=True, + expected=add_count) + if not b: # Need all idxs to build from scratch + add += rest + add_count += rest_count + del rest + del rest_count + + msg = b is None and 'creating from' or 'adding' + if not _first: _first = path + dirprefix = (_first != path) and git.repo_rel(path) + b': ' or b'' + progress('bloom: %s%s %d file%s (%d object%s).\r' + % (path_msg(dirprefix), msg, + len(add), len(add)!=1 and 's' or '', + add_count, add_count!=1 and 's' or '')) + + tfname = None + if b is None: + tfname = os.path.join(path, b'bup.tmp.bloom') + b = bloom.create(tfname, expected=add_count, k=k) + count = 0 + icount = 0 + for name in add: + ix = git.open_idx(name) + qprogress('bloom: writing %.2f%% (%d/%d objects)\r' + % (icount*100.0/add_count, icount, add_count)) + b.add_idx(ix) + count += 1 + icount += len(ix) + + finally: # This won't handle pending exceptions correctly in py2 + # Currently, there's an open file object for tfname inside b. + # Make sure it's closed before rename. + if b: b.close() if tfname: os.rename(tfname, outfilename) diff --git a/lib/bup/gc.py b/lib/bup/gc.py index e8425ff..06bd3ba 100644 --- a/lib/bup/gc.py +++ b/lib/bup/gc.py @@ -236,7 +236,7 @@ def bup_gc(threshold=10, compression=1, verbosity=0): except MissingObject as ex: log('bup: missing object %r \n' % hexstr(ex.oid)) sys.exit(1) - try: + with live_objects: # FIXME: just rename midxes and bloom, and restore them at the end if # we didn't change any packs? packdir = git.repo(b'objects/pack') @@ -252,5 +252,3 @@ def bup_gc(threshold=10, compression=1, verbosity=0): sweep(live_objects, existing_count, cat_pipe, threshold, compression, verbosity) - finally: - live_objects.close() diff --git a/lib/bup/git.py b/lib/bup/git.py index 4c1a95c..3f1d734 100644 --- a/lib/bup/git.py +++ b/lib/bup/git.py @@ -643,14 +643,22 @@ class PackIdxList: continue d[full] = ix bfull = os.path.join(self.dir, b'bup.bloom') - if self.bloom is None and os.path.exists(bfull): - self.bloom = bloom.ShaBloom(bfull) self.packs = list(set(d.values())) self.packs.sort(reverse=True, key=lambda x: len(x)) - if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self): - self.do_bloom = True - else: - self.bloom = None + if self.bloom is None and os.path.exists(bfull): + self.bloom = bloom.ShaBloom(bfull) + try: + if self.bloom and self.bloom.valid() and len(self.bloom) >= len(self): + self.do_bloom = True + else: + if self.bloom: + self.bloom, bloom_tmp = None, self.bloom + bloom_tmp.close() + except BaseException as ex: + with pending_raise(ex): + if self.bloom: + self.bloom.close() + debug1('PackIdxList: using %d index%s.\n' % (len(self.packs), len(self.packs)!=1 and 'es' or '')) diff --git a/test/int/test_bloom.py b/test/int/test_bloom.py index 5f75d7e..4169227 100644 --- a/test/int/test_bloom.py +++ b/test/int/test_bloom.py @@ -14,26 +14,25 @@ def test_bloom(tmpdir): ix.name = b'dummy.idx' ix.shatable = b''.join(hashes) for k in (4, 5): - b = bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k) - b.add_idx(ix) - assert b.pfalse_positive() < .1 - b.close() - b = bloom.ShaBloom(tmpdir + b'/pybuptest.bloom') - all_present = True - for h in hashes: - all_present &= (b.exists(h) or False) - assert all_present - false_positives = 0 - for h in [os.urandom(20) for i in range(1000)]: - if b.exists(h): - false_positives += 1 - assert false_positives < 5 + with bloom.create(tmpdir + b'/pybuptest.bloom', expected=100, k=k) as b: + b.add_idx(ix) + assert b.pfalse_positive() < .1 + with bloom.ShaBloom(tmpdir + b'/pybuptest.bloom') as b: + all_present = True + for h in hashes: + all_present &= (b.exists(h) or False) + assert all_present + false_positives = 0 + for h in [os.urandom(20) for i in range(1000)]: + if b.exists(h): + false_positives += 1 + assert false_positives < 5 os.unlink(tmpdir + b'/pybuptest.bloom') tf = tempfile.TemporaryFile(dir=tmpdir) - b = bloom.create(b'bup.bloom', f=tf, expected=100) - assert b.rwfile == tf - assert b.k == 5 + with bloom.create(b'bup.bloom', f=tf, expected=100) as b: + assert b.file == tf + assert b.k == 5 # Test large (~1GiB) filter. This may fail on s390 (31-bit # architecture), and anywhere else where the address space is @@ -41,9 +40,9 @@ def test_bloom(tmpdir): tf = tempfile.TemporaryFile(dir=tmpdir) skip_test = False try: - b = bloom.create(b'bup.bloom', f=tf, expected=2**28, - delaywrite=False) - assert b.k == 4 + with bloom.create(b'bup.bloom', f=tf, expected=2**28, + delaywrite=False) as b: + assert b.k == 4 except EnvironmentError as ex: (ptr_width, linkage) = platform.architecture() if ptr_width == '32bit' and ex.errno == errno.ENOMEM: