Merge pull request #14 from reinhard-mueller/clean-setup
Clean up initial blacklist download on install
This commit is contained in:
commit
79e59ee4b7
|
@ -12,12 +12,11 @@ python:
|
||||||
install:
|
install:
|
||||||
- python -m pip install -U pip wheel setuptools
|
- python -m pip install -U pip wheel setuptools
|
||||||
- python -m pip install -U isort flake8
|
- python -m pip install -U isort flake8
|
||||||
- python -m pip install -r requirements.txt
|
- python -Wd setup.py sdist -v
|
||||||
- python -Wd setup.py sdist
|
- python -Wd -m pip install -v dist/py3-validate-email-*.tar.gz
|
||||||
- python -Wd -m pip install dist/py3-validate-email-*.tar.gz
|
|
||||||
|
|
||||||
# command to run tests
|
# command to run tests
|
||||||
script:
|
script:
|
||||||
- python -m isort -c --skip-glob=venv
|
- python -m isort -c --skip-glob=venv
|
||||||
- python -m flake8 tests/ validate_email/ setup.py
|
- python -m flake8 tests/ validate_email/ setup.py
|
||||||
- python -m unittest discover -v
|
- python -m unittest discover -v -s tests
|
||||||
|
|
|
@ -2,7 +2,6 @@ include AUTHORS
|
||||||
include LICENSE
|
include LICENSE
|
||||||
include README.rst
|
include README.rst
|
||||||
include CHANGELOG.txt
|
include CHANGELOG.txt
|
||||||
recursive-include validate_email *
|
|
||||||
recursive-exclude tests *
|
recursive-exclude tests *
|
||||||
recursive-exclude * __pycache__
|
recursive-exclude * __pycache__
|
||||||
recursive-exclude * *.pyc
|
recursive-exclude * *.pyc
|
||||||
|
|
|
@ -1,3 +0,0 @@
|
||||||
dnspython==1.16.0
|
|
||||||
idna==2.8
|
|
||||||
filelock>=3.0.12
|
|
108
setup.py
108
setup.py
|
@ -1,106 +1,72 @@
|
||||||
|
import sys
|
||||||
|
from distutils import log
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from shutil import move, rmtree
|
|
||||||
from subprocess import check_call
|
|
||||||
from tempfile import mkdtemp
|
|
||||||
|
|
||||||
from setuptools import find_packages, setup
|
from setuptools import find_packages, setup
|
||||||
from setuptools.command.build_py import build_py
|
from setuptools.command.build_py import build_py
|
||||||
from setuptools.command.develop import develop
|
from setuptools.command.develop import develop
|
||||||
from setuptools.command.sdist import sdist
|
|
||||||
|
|
||||||
try:
|
|
||||||
# OSX Homebrew fix: https://stackoverflow.com/a/53190037/1067833
|
|
||||||
from sys import _base_executable as executable
|
|
||||||
except ImportError:
|
|
||||||
from sys import executable
|
|
||||||
|
|
||||||
_EGG_REQ_PATH = Path(__file__).parent.joinpath(
|
|
||||||
'py3_validate_email.egg-info', 'requires.txt')
|
|
||||||
_REQ_PATH = Path(__file__).parent.joinpath('requirements.txt')
|
|
||||||
|
|
||||||
with open(_REQ_PATH if _REQ_PATH.exists() else _EGG_REQ_PATH) as fd:
|
|
||||||
_req_content = fd.readlines()
|
|
||||||
_DEPENDENCIES = [x.strip() for x in _req_content if x.strip()]
|
|
||||||
|
|
||||||
with open(Path(__file__).parent.joinpath('README.rst')) as fd:
|
|
||||||
_LONG_DESC = fd.read()
|
|
||||||
|
|
||||||
|
|
||||||
def run_initial_updater():
|
def run_initial_updater(path: Path):
|
||||||
'Download an initial blacklist.txt on install time.'
|
'Download an initial blacklist.txt on install time.'
|
||||||
# Install dependencies so the initial update can run
|
# Only import the updater module to avoid requiring all the dependencies
|
||||||
check_call([executable, '-m', 'pip', 'install'] + _DEPENDENCIES)
|
# and auto-running the updater.
|
||||||
# The updater will walk code stack frames and see if this
|
sys.path.append(str(path.joinpath('validate_email')))
|
||||||
# variable is set in locals() to determine if it is run from the
|
orig_dont_write_bytecode = sys.dont_write_bytecode
|
||||||
# setup, in which case it won't autoupdate.
|
sys.dont_write_bytecode = True
|
||||||
_IS_VALIDATEEMAIL_SETUP = True
|
try:
|
||||||
from validate_email.updater import BlacklistUpdater, LIB_PATH_DEFAULT
|
from updater import BLACKLIST_FILEPATH_INSTALLED, BlacklistUpdater
|
||||||
LIB_PATH_DEFAULT.mkdir(exist_ok=True)
|
log.info(f'downloading blacklist to {BLACKLIST_FILEPATH_INSTALLED}')
|
||||||
blacklist_updater = BlacklistUpdater()
|
BlacklistUpdater()._install()
|
||||||
blacklist_updater._is_install_time = _IS_VALIDATEEMAIL_SETUP
|
finally:
|
||||||
blacklist_updater.process(force=True)
|
sys.path = sys.path[:-1]
|
||||||
|
sys.dont_write_bytecode = orig_dont_write_bytecode
|
||||||
|
|
||||||
|
|
||||||
class DevelopCommand(develop):
|
class DevelopCommand(develop):
|
||||||
'Develop command.'
|
"""
|
||||||
|
Adapted version of the 'develop' command.
|
||||||
|
|
||||||
|
After finishing the usual build run, download the blacklist and
|
||||||
|
store it into the source directory, because that is from where the
|
||||||
|
library will run in a developer install.
|
||||||
|
"""
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
if self.dry_run:
|
|
||||||
return super().run()
|
|
||||||
run_initial_updater()
|
|
||||||
super().run()
|
super().run()
|
||||||
|
if not self.dry_run:
|
||||||
|
run_initial_updater(Path(__file__).parent)
|
||||||
class SdistCommand(sdist):
|
|
||||||
'Sdist command.'
|
|
||||||
|
|
||||||
def run(self):
|
|
||||||
"""
|
|
||||||
Manually remove the data directory before creating the
|
|
||||||
distribution package, every install will create it for
|
|
||||||
themselves when installing created the python wheel.
|
|
||||||
`MANIFEST.in` should not remove the data dir since install and
|
|
||||||
develop/install would exclude it!
|
|
||||||
"""
|
|
||||||
if self.dry_run:
|
|
||||||
return super().run()
|
|
||||||
tempdir = Path(mkdtemp()).joinpath('data')
|
|
||||||
data_dir = Path(
|
|
||||||
__file__).absolute().parent.joinpath('validate_email', 'data')
|
|
||||||
do_move = data_dir.exists()
|
|
||||||
if do_move:
|
|
||||||
move(src=data_dir, dst=tempdir)
|
|
||||||
super().run()
|
|
||||||
if do_move:
|
|
||||||
move(src=tempdir, dst=data_dir)
|
|
||||||
rmtree(path=tempdir.parent)
|
|
||||||
|
|
||||||
|
|
||||||
class BuildPyCommand(build_py):
|
class BuildPyCommand(build_py):
|
||||||
'BuildPy command.'
|
"""
|
||||||
|
Adapted version of the 'build_py' command.
|
||||||
|
|
||||||
|
After finishing the usual build run, download the blacklist and
|
||||||
|
store it into the build directory. A subsequent 'install' step will
|
||||||
|
copy the full contents of the build directory to the install
|
||||||
|
target, thus including the blacklist.
|
||||||
|
"""
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
if self.dry_run:
|
|
||||||
return super().run()
|
|
||||||
run_initial_updater()
|
|
||||||
super().run()
|
super().run()
|
||||||
|
if not self.dry_run:
|
||||||
|
run_initial_updater(Path(self.build_lib))
|
||||||
|
|
||||||
|
|
||||||
setup(
|
setup(
|
||||||
name='py3-validate-email',
|
name='py3-validate-email',
|
||||||
version='0.2.6',
|
version='0.2.6',
|
||||||
packages=find_packages(exclude=['tests']),
|
packages=find_packages(exclude=['tests']),
|
||||||
install_requires=_DEPENDENCIES,
|
install_requires=['dnspython~=1.16', 'idna~=2.8', 'filelock~=3.0'],
|
||||||
author='László Károlyi',
|
author='László Károlyi',
|
||||||
author_email='laszlo@karolyi.hu',
|
author_email='laszlo@karolyi.hu',
|
||||||
include_package_data=True,
|
|
||||||
description=(
|
description=(
|
||||||
'Email validator with regex, blacklisted domains and SMTP checking.'),
|
'Email validator with regex, blacklisted domains and SMTP checking.'),
|
||||||
long_description=_LONG_DESC,
|
long_description=Path(__file__).parent.joinpath('README.rst').read_text(),
|
||||||
long_description_content_type='text/x-rst',
|
long_description_content_type='text/x-rst',
|
||||||
keywords='email validation verification mx verify',
|
keywords='email validation verification mx verify',
|
||||||
url='http://github.com/karolyi/py3-validate-email',
|
url='http://github.com/karolyi/py3-validate-email',
|
||||||
cmdclass=dict(
|
cmdclass=dict(build_py=BuildPyCommand, develop=DevelopCommand),
|
||||||
develop=DevelopCommand, sdist=SdistCommand, build_py=BuildPyCommand),
|
|
||||||
license='LGPL',
|
license='LGPL',
|
||||||
)
|
)
|
||||||
|
|
|
@ -9,10 +9,6 @@ from typing import Callable, Optional
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
from urllib.request import Request, urlopen
|
from urllib.request import Request, urlopen
|
||||||
|
|
||||||
from filelock import FileLock
|
|
||||||
|
|
||||||
from .utils import is_setuptime
|
|
||||||
|
|
||||||
LOGGER = getLogger(__name__)
|
LOGGER = getLogger(__name__)
|
||||||
TMP_PATH = Path(gettempdir()).joinpath(
|
TMP_PATH = Path(gettempdir()).joinpath(
|
||||||
f'{gettempprefix()}-py3-validate-email-{geteuid()}')
|
f'{gettempprefix()}-py3-validate-email-{geteuid()}')
|
||||||
|
@ -37,57 +33,30 @@ class BlacklistUpdater(object):
|
||||||
"""
|
"""
|
||||||
|
|
||||||
_refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days
|
_refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days
|
||||||
_is_install_time: bool = False
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _etag_filepath(self) -> str:
|
|
||||||
'Return the ETag file path to use.'
|
|
||||||
return ETAG_FILEPATH_INSTALLED \
|
|
||||||
if self._is_install_time else ETAG_FILEPATH_TMP
|
|
||||||
|
|
||||||
@property
|
|
||||||
def _blacklist_filepath(self) -> str:
|
|
||||||
'Return the blacklist file path to use.'
|
|
||||||
return BLACKLIST_FILEPATH_INSTALLED \
|
|
||||||
if self._is_install_time else BLACKLIST_FILEPATH_TMP
|
|
||||||
|
|
||||||
def _read_etag(self) -> Optional[str]:
|
def _read_etag(self) -> Optional[str]:
|
||||||
'Read the etag header from the stored etag file when exists.'
|
'Read the etag header from the stored etag file when exists.'
|
||||||
for path in [ETAG_FILEPATH_TMP, ETAG_FILEPATH_INSTALLED]:
|
for path in [ETAG_FILEPATH_TMP, ETAG_FILEPATH_INSTALLED]:
|
||||||
try:
|
try:
|
||||||
with open(path) as fd:
|
return path.read_text().strip()
|
||||||
return fd.read().strip()
|
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def _write_etag(self, content: str):
|
|
||||||
'Write the etag of the newly received file to the cache.'
|
|
||||||
path = self._etag_filepath
|
|
||||||
LOGGER.debug(msg=f'Storing ETag response into {path}.')
|
|
||||||
with open(path, 'w') as fd:
|
|
||||||
fd.write(content)
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _is_old(self) -> bool:
|
def _is_old(self) -> bool:
|
||||||
'Return `True` if the locally stored file is old.'
|
'Return `True` if the locally stored file is old.'
|
||||||
true_when_older_than = time() - self._refresh_when_older_than
|
true_when_older_than = time() - self._refresh_when_older_than
|
||||||
try:
|
for path in [BLACKLIST_FILEPATH_TMP, BLACKLIST_FILEPATH_INSTALLED]:
|
||||||
ctime = BLACKLIST_FILEPATH_TMP.stat().st_ctime
|
try:
|
||||||
if ctime >= true_when_older_than:
|
return path.stat().st_ctime < true_when_older_than
|
||||||
# Downloaded tmp file is still fresh enough
|
except FileNotFoundError:
|
||||||
return False
|
pass
|
||||||
except FileNotFoundError:
|
return True # no file found at all
|
||||||
pass
|
|
||||||
try:
|
|
||||||
ctime = BLACKLIST_FILEPATH_INSTALLED.stat().st_ctime
|
|
||||||
except FileNotFoundError:
|
|
||||||
return True
|
|
||||||
return ctime < true_when_older_than
|
|
||||||
|
|
||||||
def _get_headers(self, force_update: bool = False) -> dict:
|
def _get_headers(self, force_update: bool = False) -> dict:
|
||||||
'Compile a header with etag if available.'
|
'Compile a header with etag if available.'
|
||||||
headers = dict()
|
headers = dict()
|
||||||
if force_update or self._is_install_time:
|
if force_update:
|
||||||
return headers
|
return headers
|
||||||
etag = self._read_etag()
|
etag = self._read_etag()
|
||||||
if not etag:
|
if not etag:
|
||||||
|
@ -95,27 +64,40 @@ class BlacklistUpdater(object):
|
||||||
headers['If-None-Match'] = etag
|
headers['If-None-Match'] = etag
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def _write_new_file(self, response: HTTPResponse):
|
def _download(self, headers: dict, blacklist_path: Path, etag_path: Path):
|
||||||
'Write new data file on its arrival.'
|
'Downlad and store blacklist file.'
|
||||||
|
LOGGER.debug(msg=f'Checking {BLACKLIST_URL}')
|
||||||
|
request = Request(url=BLACKLIST_URL, headers=headers)
|
||||||
|
response = urlopen(url=request) # type: HTTPResponse
|
||||||
|
# New data available
|
||||||
|
LOGGER.debug(msg=f'Writing response into {blacklist_path}')
|
||||||
|
blacklist_path.write_bytes(response.fp.read())
|
||||||
if 'ETag' in response.headers:
|
if 'ETag' in response.headers:
|
||||||
self._write_etag(response.headers.get('ETag'))
|
LOGGER.debug(msg=f'Storing ETag response into {etag_path}.')
|
||||||
path = self._blacklist_filepath
|
etag_path.write_text(response.headers['ETag'])
|
||||||
LOGGER.debug(msg=f'Writing response into {path}')
|
|
||||||
with open(path, 'wb') as fd:
|
def _install(self):
|
||||||
fd.write(response.fp.read())
|
"""
|
||||||
|
Download and store the blacklist file and the matching etag file
|
||||||
|
into the library path. This is executed from setup.py upon
|
||||||
|
installation of the library. Don't call this in your
|
||||||
|
application.
|
||||||
|
"""
|
||||||
|
LIB_PATH_DEFAULT.mkdir(exist_ok=True)
|
||||||
|
self._download(
|
||||||
|
headers={}, blacklist_path=BLACKLIST_FILEPATH_INSTALLED,
|
||||||
|
etag_path=ETAG_FILEPATH_INSTALLED)
|
||||||
|
|
||||||
def _process(self, force: bool = False):
|
def _process(self, force: bool = False):
|
||||||
'Start optionally updating the blacklist.txt file, while locked.'
|
'Start optionally updating the blacklist.txt file, while locked.'
|
||||||
if not force and not self._is_old:
|
if not force and not self._is_old:
|
||||||
LOGGER.debug(msg='Not updating because file is fresh enough.')
|
LOGGER.debug(msg='Not updating because file is fresh enough.')
|
||||||
return
|
return
|
||||||
LOGGER.debug(msg=f'Checking {BLACKLIST_URL}')
|
|
||||||
request = Request(
|
|
||||||
url=BLACKLIST_URL, headers=self._get_headers(force_update=force))
|
|
||||||
try:
|
try:
|
||||||
response = urlopen(url=request) # type: HTTPResponse
|
self._download(
|
||||||
# New data available
|
headers=self._get_headers(force_update=force),
|
||||||
self._write_new_file(response=response)
|
blacklist_path=BLACKLIST_FILEPATH_TMP,
|
||||||
|
etag_path=ETAG_FILEPATH_TMP)
|
||||||
except HTTPError as exc:
|
except HTTPError as exc:
|
||||||
if exc.code == 304:
|
if exc.code == 304:
|
||||||
# Not modified, update date on the tmp file
|
# Not modified, update date on the tmp file
|
||||||
|
@ -128,6 +110,8 @@ class BlacklistUpdater(object):
|
||||||
self, force: bool = False, callback: Optional[Callable] = None):
|
self, force: bool = False, callback: Optional[Callable] = None):
|
||||||
'Start optionally updating the blacklist.txt file.'
|
'Start optionally updating the blacklist.txt file.'
|
||||||
# Locking to avoid multi-process update on multi-process startup
|
# Locking to avoid multi-process update on multi-process startup
|
||||||
|
# Import filelock locally because this module is als used by setup.py
|
||||||
|
from filelock import FileLock
|
||||||
with FileLock(lock_file=LOCK_PATH):
|
with FileLock(lock_file=LOCK_PATH):
|
||||||
self._process(force=force)
|
self._process(force=force)
|
||||||
# Always execute callback because multiple processes can have
|
# Always execute callback because multiple processes can have
|
||||||
|
@ -144,8 +128,6 @@ def update_builtin_blacklist(
|
||||||
Update and reload the built-in blacklist. Return the `Thread` used
|
Update and reload the built-in blacklist. Return the `Thread` used
|
||||||
to do the background update, so it can be `join()`-ed.
|
to do the background update, so it can be `join()`-ed.
|
||||||
"""
|
"""
|
||||||
if is_setuptime():
|
|
||||||
return
|
|
||||||
LOGGER.info(msg='Starting optional update of built-in blacklist.')
|
LOGGER.info(msg='Starting optional update of built-in blacklist.')
|
||||||
blacklist_updater = BlacklistUpdater()
|
blacklist_updater = BlacklistUpdater()
|
||||||
kwargs = dict(force=force, callback=callback)
|
kwargs = dict(force=force, callback=callback)
|
||||||
|
|
|
@ -1,20 +0,0 @@
|
||||||
try:
|
|
||||||
from sys import _getframe
|
|
||||||
getframe = _getframe
|
|
||||||
except ImportError:
|
|
||||||
getframe = None
|
|
||||||
|
|
||||||
from traceback import walk_stack
|
|
||||||
|
|
||||||
|
|
||||||
def is_setuptime() -> bool:
|
|
||||||
'Return `True` if called from setup.'
|
|
||||||
if getframe is None:
|
|
||||||
# This is not CPython, can't know if this is setup time
|
|
||||||
return False
|
|
||||||
for frame, lineno in walk_stack(f=getframe()):
|
|
||||||
# @See setup.py
|
|
||||||
if frame.f_locals and \
|
|
||||||
frame.f_locals.get('_IS_VALIDATEEMAIL_SETUP') is True:
|
|
||||||
return True
|
|
||||||
return False
|
|
Loading…
Reference in New Issue