Merge pull request #14 from reinhard-mueller/clean-setup

Clean up initial blacklist download on install
This commit is contained in:
László Károlyi 2020-04-14 12:24:30 +02:00 committed by GitHub
commit 79e59ee4b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 76 additions and 153 deletions

View File

@ -12,12 +12,11 @@ python:
install: install:
- python -m pip install -U pip wheel setuptools - python -m pip install -U pip wheel setuptools
- python -m pip install -U isort flake8 - python -m pip install -U isort flake8
- python -m pip install -r requirements.txt - python -Wd setup.py sdist -v
- python -Wd setup.py sdist - python -Wd -m pip install -v dist/py3-validate-email-*.tar.gz
- python -Wd -m pip install dist/py3-validate-email-*.tar.gz
# command to run tests # command to run tests
script: script:
- python -m isort -c --skip-glob=venv - python -m isort -c --skip-glob=venv
- python -m flake8 tests/ validate_email/ setup.py - python -m flake8 tests/ validate_email/ setup.py
- python -m unittest discover -v - python -m unittest discover -v -s tests

View File

@ -2,7 +2,6 @@ include AUTHORS
include LICENSE include LICENSE
include README.rst include README.rst
include CHANGELOG.txt include CHANGELOG.txt
recursive-include validate_email *
recursive-exclude tests * recursive-exclude tests *
recursive-exclude * __pycache__ recursive-exclude * __pycache__
recursive-exclude * *.pyc recursive-exclude * *.pyc

View File

@ -1,3 +0,0 @@
dnspython==1.16.0
idna==2.8
filelock>=3.0.12

108
setup.py
View File

@ -1,106 +1,72 @@
import sys
from distutils import log
from pathlib import Path from pathlib import Path
from shutil import move, rmtree
from subprocess import check_call
from tempfile import mkdtemp
from setuptools import find_packages, setup from setuptools import find_packages, setup
from setuptools.command.build_py import build_py from setuptools.command.build_py import build_py
from setuptools.command.develop import develop from setuptools.command.develop import develop
from setuptools.command.sdist import sdist
try:
# OSX Homebrew fix: https://stackoverflow.com/a/53190037/1067833
from sys import _base_executable as executable
except ImportError:
from sys import executable
_EGG_REQ_PATH = Path(__file__).parent.joinpath(
'py3_validate_email.egg-info', 'requires.txt')
_REQ_PATH = Path(__file__).parent.joinpath('requirements.txt')
with open(_REQ_PATH if _REQ_PATH.exists() else _EGG_REQ_PATH) as fd:
_req_content = fd.readlines()
_DEPENDENCIES = [x.strip() for x in _req_content if x.strip()]
with open(Path(__file__).parent.joinpath('README.rst')) as fd:
_LONG_DESC = fd.read()
def run_initial_updater(): def run_initial_updater(path: Path):
'Download an initial blacklist.txt on install time.' 'Download an initial blacklist.txt on install time.'
# Install dependencies so the initial update can run # Only import the updater module to avoid requiring all the dependencies
check_call([executable, '-m', 'pip', 'install'] + _DEPENDENCIES) # and auto-running the updater.
# The updater will walk code stack frames and see if this sys.path.append(str(path.joinpath('validate_email')))
# variable is set in locals() to determine if it is run from the orig_dont_write_bytecode = sys.dont_write_bytecode
# setup, in which case it won't autoupdate. sys.dont_write_bytecode = True
_IS_VALIDATEEMAIL_SETUP = True try:
from validate_email.updater import BlacklistUpdater, LIB_PATH_DEFAULT from updater import BLACKLIST_FILEPATH_INSTALLED, BlacklistUpdater
LIB_PATH_DEFAULT.mkdir(exist_ok=True) log.info(f'downloading blacklist to {BLACKLIST_FILEPATH_INSTALLED}')
blacklist_updater = BlacklistUpdater() BlacklistUpdater()._install()
blacklist_updater._is_install_time = _IS_VALIDATEEMAIL_SETUP finally:
blacklist_updater.process(force=True) sys.path = sys.path[:-1]
sys.dont_write_bytecode = orig_dont_write_bytecode
class DevelopCommand(develop): class DevelopCommand(develop):
'Develop command.' """
Adapted version of the 'develop' command.
After finishing the usual build run, download the blacklist and
store it into the source directory, because that is from where the
library will run in a developer install.
"""
def run(self): def run(self):
if self.dry_run:
return super().run()
run_initial_updater()
super().run() super().run()
if not self.dry_run:
run_initial_updater(Path(__file__).parent)
class SdistCommand(sdist):
'Sdist command.'
def run(self):
"""
Manually remove the data directory before creating the
distribution package, every install will create it for
themselves when installing created the python wheel.
`MANIFEST.in` should not remove the data dir since install and
develop/install would exclude it!
"""
if self.dry_run:
return super().run()
tempdir = Path(mkdtemp()).joinpath('data')
data_dir = Path(
__file__).absolute().parent.joinpath('validate_email', 'data')
do_move = data_dir.exists()
if do_move:
move(src=data_dir, dst=tempdir)
super().run()
if do_move:
move(src=tempdir, dst=data_dir)
rmtree(path=tempdir.parent)
class BuildPyCommand(build_py): class BuildPyCommand(build_py):
'BuildPy command.' """
Adapted version of the 'build_py' command.
After finishing the usual build run, download the blacklist and
store it into the build directory. A subsequent 'install' step will
copy the full contents of the build directory to the install
target, thus including the blacklist.
"""
def run(self): def run(self):
if self.dry_run:
return super().run()
run_initial_updater()
super().run() super().run()
if not self.dry_run:
run_initial_updater(Path(self.build_lib))
setup( setup(
name='py3-validate-email', name='py3-validate-email',
version='0.2.6', version='0.2.6',
packages=find_packages(exclude=['tests']), packages=find_packages(exclude=['tests']),
install_requires=_DEPENDENCIES, install_requires=['dnspython~=1.16', 'idna~=2.8', 'filelock~=3.0'],
author='László Károlyi', author='László Károlyi',
author_email='laszlo@karolyi.hu', author_email='laszlo@karolyi.hu',
include_package_data=True,
description=( description=(
'Email validator with regex, blacklisted domains and SMTP checking.'), 'Email validator with regex, blacklisted domains and SMTP checking.'),
long_description=_LONG_DESC, long_description=Path(__file__).parent.joinpath('README.rst').read_text(),
long_description_content_type='text/x-rst', long_description_content_type='text/x-rst',
keywords='email validation verification mx verify', keywords='email validation verification mx verify',
url='http://github.com/karolyi/py3-validate-email', url='http://github.com/karolyi/py3-validate-email',
cmdclass=dict( cmdclass=dict(build_py=BuildPyCommand, develop=DevelopCommand),
develop=DevelopCommand, sdist=SdistCommand, build_py=BuildPyCommand),
license='LGPL', license='LGPL',
) )

View File

@ -9,10 +9,6 @@ from typing import Callable, Optional
from urllib.error import HTTPError from urllib.error import HTTPError
from urllib.request import Request, urlopen from urllib.request import Request, urlopen
from filelock import FileLock
from .utils import is_setuptime
LOGGER = getLogger(__name__) LOGGER = getLogger(__name__)
TMP_PATH = Path(gettempdir()).joinpath( TMP_PATH = Path(gettempdir()).joinpath(
f'{gettempprefix()}-py3-validate-email-{geteuid()}') f'{gettempprefix()}-py3-validate-email-{geteuid()}')
@ -37,57 +33,30 @@ class BlacklistUpdater(object):
""" """
_refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days _refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days
_is_install_time: bool = False
@property
def _etag_filepath(self) -> str:
'Return the ETag file path to use.'
return ETAG_FILEPATH_INSTALLED \
if self._is_install_time else ETAG_FILEPATH_TMP
@property
def _blacklist_filepath(self) -> str:
'Return the blacklist file path to use.'
return BLACKLIST_FILEPATH_INSTALLED \
if self._is_install_time else BLACKLIST_FILEPATH_TMP
def _read_etag(self) -> Optional[str]: def _read_etag(self) -> Optional[str]:
'Read the etag header from the stored etag file when exists.' 'Read the etag header from the stored etag file when exists.'
for path in [ETAG_FILEPATH_TMP, ETAG_FILEPATH_INSTALLED]: for path in [ETAG_FILEPATH_TMP, ETAG_FILEPATH_INSTALLED]:
try: try:
with open(path) as fd: return path.read_text().strip()
return fd.read().strip()
except FileNotFoundError: except FileNotFoundError:
pass pass
def _write_etag(self, content: str):
'Write the etag of the newly received file to the cache.'
path = self._etag_filepath
LOGGER.debug(msg=f'Storing ETag response into {path}.')
with open(path, 'w') as fd:
fd.write(content)
@property @property
def _is_old(self) -> bool: def _is_old(self) -> bool:
'Return `True` if the locally stored file is old.' 'Return `True` if the locally stored file is old.'
true_when_older_than = time() - self._refresh_when_older_than true_when_older_than = time() - self._refresh_when_older_than
try: for path in [BLACKLIST_FILEPATH_TMP, BLACKLIST_FILEPATH_INSTALLED]:
ctime = BLACKLIST_FILEPATH_TMP.stat().st_ctime try:
if ctime >= true_when_older_than: return path.stat().st_ctime < true_when_older_than
# Downloaded tmp file is still fresh enough except FileNotFoundError:
return False pass
except FileNotFoundError: return True # no file found at all
pass
try:
ctime = BLACKLIST_FILEPATH_INSTALLED.stat().st_ctime
except FileNotFoundError:
return True
return ctime < true_when_older_than
def _get_headers(self, force_update: bool = False) -> dict: def _get_headers(self, force_update: bool = False) -> dict:
'Compile a header with etag if available.' 'Compile a header with etag if available.'
headers = dict() headers = dict()
if force_update or self._is_install_time: if force_update:
return headers return headers
etag = self._read_etag() etag = self._read_etag()
if not etag: if not etag:
@ -95,27 +64,40 @@ class BlacklistUpdater(object):
headers['If-None-Match'] = etag headers['If-None-Match'] = etag
return headers return headers
def _write_new_file(self, response: HTTPResponse): def _download(self, headers: dict, blacklist_path: Path, etag_path: Path):
'Write new data file on its arrival.' 'Downlad and store blacklist file.'
LOGGER.debug(msg=f'Checking {BLACKLIST_URL}')
request = Request(url=BLACKLIST_URL, headers=headers)
response = urlopen(url=request) # type: HTTPResponse
# New data available
LOGGER.debug(msg=f'Writing response into {blacklist_path}')
blacklist_path.write_bytes(response.fp.read())
if 'ETag' in response.headers: if 'ETag' in response.headers:
self._write_etag(response.headers.get('ETag')) LOGGER.debug(msg=f'Storing ETag response into {etag_path}.')
path = self._blacklist_filepath etag_path.write_text(response.headers['ETag'])
LOGGER.debug(msg=f'Writing response into {path}')
with open(path, 'wb') as fd: def _install(self):
fd.write(response.fp.read()) """
Download and store the blacklist file and the matching etag file
into the library path. This is executed from setup.py upon
installation of the library. Don't call this in your
application.
"""
LIB_PATH_DEFAULT.mkdir(exist_ok=True)
self._download(
headers={}, blacklist_path=BLACKLIST_FILEPATH_INSTALLED,
etag_path=ETAG_FILEPATH_INSTALLED)
def _process(self, force: bool = False): def _process(self, force: bool = False):
'Start optionally updating the blacklist.txt file, while locked.' 'Start optionally updating the blacklist.txt file, while locked.'
if not force and not self._is_old: if not force and not self._is_old:
LOGGER.debug(msg='Not updating because file is fresh enough.') LOGGER.debug(msg='Not updating because file is fresh enough.')
return return
LOGGER.debug(msg=f'Checking {BLACKLIST_URL}')
request = Request(
url=BLACKLIST_URL, headers=self._get_headers(force_update=force))
try: try:
response = urlopen(url=request) # type: HTTPResponse self._download(
# New data available headers=self._get_headers(force_update=force),
self._write_new_file(response=response) blacklist_path=BLACKLIST_FILEPATH_TMP,
etag_path=ETAG_FILEPATH_TMP)
except HTTPError as exc: except HTTPError as exc:
if exc.code == 304: if exc.code == 304:
# Not modified, update date on the tmp file # Not modified, update date on the tmp file
@ -128,6 +110,8 @@ class BlacklistUpdater(object):
self, force: bool = False, callback: Optional[Callable] = None): self, force: bool = False, callback: Optional[Callable] = None):
'Start optionally updating the blacklist.txt file.' 'Start optionally updating the blacklist.txt file.'
# Locking to avoid multi-process update on multi-process startup # Locking to avoid multi-process update on multi-process startup
# Import filelock locally because this module is als used by setup.py
from filelock import FileLock
with FileLock(lock_file=LOCK_PATH): with FileLock(lock_file=LOCK_PATH):
self._process(force=force) self._process(force=force)
# Always execute callback because multiple processes can have # Always execute callback because multiple processes can have
@ -144,8 +128,6 @@ def update_builtin_blacklist(
Update and reload the built-in blacklist. Return the `Thread` used Update and reload the built-in blacklist. Return the `Thread` used
to do the background update, so it can be `join()`-ed. to do the background update, so it can be `join()`-ed.
""" """
if is_setuptime():
return
LOGGER.info(msg='Starting optional update of built-in blacklist.') LOGGER.info(msg='Starting optional update of built-in blacklist.')
blacklist_updater = BlacklistUpdater() blacklist_updater = BlacklistUpdater()
kwargs = dict(force=force, callback=callback) kwargs = dict(force=force, callback=callback)

View File

@ -1,20 +0,0 @@
try:
from sys import _getframe
getframe = _getframe
except ImportError:
getframe = None
from traceback import walk_stack
def is_setuptime() -> bool:
'Return `True` if called from setup.'
if getframe is None:
# This is not CPython, can't know if this is setup time
return False
for frame, lineno in walk_stack(f=getframe()):
# @See setup.py
if frame.f_locals and \
frame.f_locals.get('_IS_VALIDATEEMAIL_SETUP') is True:
return True
return False