Locking blacklist while reading, make it consistent amongst multiprocesses

This commit is contained in:
László Károlyi 2020-04-11 21:59:49 +02:00
parent dfd1d68c40
commit 6790ded356
Signed by: karolyi
GPG Key ID: 2DCAF25E55735BFE
2 changed files with 18 additions and 14 deletions

View File

@ -1,9 +1,11 @@
from logging import getLogger from logging import getLogger
from typing import Optional from typing import Optional
from filelock import FileLock
from .exceptions import DomainBlacklistedError from .exceptions import DomainBlacklistedError
from .updater import ( from .updater import (
BLACKLIST_FILEPATH_INSTALLED, BLACKLIST_FILEPATH_TMP, BLACKLIST_FILEPATH_INSTALLED, BLACKLIST_FILEPATH_TMP, LOCK_PATH,
update_builtin_blacklist) update_builtin_blacklist)
SetOrNone = Optional[set] SetOrNone = Optional[set]
@ -30,8 +32,8 @@ class DomainListValidator(object):
def _blacklist_path(self) -> str: def _blacklist_path(self) -> str:
'Return the path of the `blacklist.txt` that should be loaded.' 'Return the path of the `blacklist.txt` that should be loaded.'
try: try:
# Zero size, file is touched to indicate the # Zero size: file is touched to indicate the preinstalled
# preinstalled file is still fresh enough # file is still fresh enough
return BLACKLIST_FILEPATH_INSTALLED \ return BLACKLIST_FILEPATH_INSTALLED \
if BLACKLIST_FILEPATH_TMP.stat().st_size == 0 \ if BLACKLIST_FILEPATH_TMP.stat().st_size == 0 \
else BLACKLIST_FILEPATH_TMP else BLACKLIST_FILEPATH_TMP
@ -42,13 +44,14 @@ class DomainListValidator(object):
'(Re)load our built-in blacklist.' '(Re)load our built-in blacklist.'
if not self._is_builtin_bl_used: if not self._is_builtin_bl_used:
return return
bl_path = self._blacklist_path with FileLock(lock_file=LOCK_PATH):
LOGGER.debug(msg=f'(Re)loading blacklist: {bl_path}') bl_path = self._blacklist_path
try: LOGGER.debug(msg=f'(Re)loading blacklist from {bl_path}')
with open(bl_path) as fd: try:
lines = fd.readlines() with open(bl_path) as fd:
except FileNotFoundError: lines = fd.readlines()
return except FileNotFoundError:
return
self.domain_blacklist = set( self.domain_blacklist = set(
x.strip().lower() for x in lines if x.strip()) x.strip().lower() for x in lines if x.strip())

View File

@ -35,7 +35,6 @@ class BlacklistUpdater(object):
""" """
_refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days _refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days
_on_update_callback: Callable = None
_is_install_time: bool = False _is_install_time: bool = False
@property @property
@ -122,16 +121,18 @@ class BlacklistUpdater(object):
BLACKLIST_FILEPATH_TMP.touch() BLACKLIST_FILEPATH_TMP.touch()
return return
raise raise
if self._on_update_callback:
self._on_update_callback()
def process( def process(
self, force: bool = False, callback: Optional[Callable] = None): self, force: bool = False, callback: Optional[Callable] = None):
'Start optionally updating the blacklist.txt file.' 'Start optionally updating the blacklist.txt file.'
# Locking to avoid multi-process update on multi-process startup # Locking to avoid multi-process update on multi-process startup
self._on_update_callback = callback
with FileLock(lock_file=LOCK_PATH): with FileLock(lock_file=LOCK_PATH):
self._process(force=force) self._process(force=force)
# Always execute callback because multiple processes can have
# different versions of blacklists (one before, one after
# updating)
if callback:
callback()
def update_builtin_blacklist( def update_builtin_blacklist(