Merge pull request #14 from reinhard-mueller/clean-setup

Clean up initial blacklist download on install
This commit is contained in:
László Károlyi 2020-04-14 12:24:30 +02:00 committed by GitHub
commit 79e59ee4b7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 76 additions and 153 deletions

View File

@ -12,12 +12,11 @@ python:
install:
- python -m pip install -U pip wheel setuptools
- python -m pip install -U isort flake8
- python -m pip install -r requirements.txt
- python -Wd setup.py sdist
- python -Wd -m pip install dist/py3-validate-email-*.tar.gz
- python -Wd setup.py sdist -v
- python -Wd -m pip install -v dist/py3-validate-email-*.tar.gz
# command to run tests
script:
- python -m isort -c --skip-glob=venv
- python -m flake8 tests/ validate_email/ setup.py
- python -m unittest discover -v
- python -m unittest discover -v -s tests

View File

@ -2,7 +2,6 @@ include AUTHORS
include LICENSE
include README.rst
include CHANGELOG.txt
recursive-include validate_email *
recursive-exclude tests *
recursive-exclude * __pycache__
recursive-exclude * *.pyc

View File

@ -1,3 +0,0 @@
dnspython==1.16.0
idna==2.8
filelock>=3.0.12

108
setup.py
View File

@ -1,106 +1,72 @@
import sys
from distutils import log
from pathlib import Path
from shutil import move, rmtree
from subprocess import check_call
from tempfile import mkdtemp
from setuptools import find_packages, setup
from setuptools.command.build_py import build_py
from setuptools.command.develop import develop
from setuptools.command.sdist import sdist
try:
# OSX Homebrew fix: https://stackoverflow.com/a/53190037/1067833
from sys import _base_executable as executable
except ImportError:
from sys import executable
_EGG_REQ_PATH = Path(__file__).parent.joinpath(
'py3_validate_email.egg-info', 'requires.txt')
_REQ_PATH = Path(__file__).parent.joinpath('requirements.txt')
with open(_REQ_PATH if _REQ_PATH.exists() else _EGG_REQ_PATH) as fd:
_req_content = fd.readlines()
_DEPENDENCIES = [x.strip() for x in _req_content if x.strip()]
with open(Path(__file__).parent.joinpath('README.rst')) as fd:
_LONG_DESC = fd.read()
def run_initial_updater():
def run_initial_updater(path: Path):
'Download an initial blacklist.txt on install time.'
# Install dependencies so the initial update can run
check_call([executable, '-m', 'pip', 'install'] + _DEPENDENCIES)
# The updater will walk code stack frames and see if this
# variable is set in locals() to determine if it is run from the
# setup, in which case it won't autoupdate.
_IS_VALIDATEEMAIL_SETUP = True
from validate_email.updater import BlacklistUpdater, LIB_PATH_DEFAULT
LIB_PATH_DEFAULT.mkdir(exist_ok=True)
blacklist_updater = BlacklistUpdater()
blacklist_updater._is_install_time = _IS_VALIDATEEMAIL_SETUP
blacklist_updater.process(force=True)
# Only import the updater module to avoid requiring all the dependencies
# and auto-running the updater.
sys.path.append(str(path.joinpath('validate_email')))
orig_dont_write_bytecode = sys.dont_write_bytecode
sys.dont_write_bytecode = True
try:
from updater import BLACKLIST_FILEPATH_INSTALLED, BlacklistUpdater
log.info(f'downloading blacklist to {BLACKLIST_FILEPATH_INSTALLED}')
BlacklistUpdater()._install()
finally:
sys.path = sys.path[:-1]
sys.dont_write_bytecode = orig_dont_write_bytecode
class DevelopCommand(develop):
'Develop command.'
"""
Adapted version of the 'develop' command.
After finishing the usual build run, download the blacklist and
store it into the source directory, because that is from where the
library will run in a developer install.
"""
def run(self):
if self.dry_run:
return super().run()
run_initial_updater()
super().run()
class SdistCommand(sdist):
'Sdist command.'
def run(self):
"""
Manually remove the data directory before creating the
distribution package, every install will create it for
themselves when installing created the python wheel.
`MANIFEST.in` should not remove the data dir since install and
develop/install would exclude it!
"""
if self.dry_run:
return super().run()
tempdir = Path(mkdtemp()).joinpath('data')
data_dir = Path(
__file__).absolute().parent.joinpath('validate_email', 'data')
do_move = data_dir.exists()
if do_move:
move(src=data_dir, dst=tempdir)
super().run()
if do_move:
move(src=tempdir, dst=data_dir)
rmtree(path=tempdir.parent)
if not self.dry_run:
run_initial_updater(Path(__file__).parent)
class BuildPyCommand(build_py):
'BuildPy command.'
"""
Adapted version of the 'build_py' command.
After finishing the usual build run, download the blacklist and
store it into the build directory. A subsequent 'install' step will
copy the full contents of the build directory to the install
target, thus including the blacklist.
"""
def run(self):
if self.dry_run:
return super().run()
run_initial_updater()
super().run()
if not self.dry_run:
run_initial_updater(Path(self.build_lib))
setup(
name='py3-validate-email',
version='0.2.6',
packages=find_packages(exclude=['tests']),
install_requires=_DEPENDENCIES,
install_requires=['dnspython~=1.16', 'idna~=2.8', 'filelock~=3.0'],
author='László Károlyi',
author_email='laszlo@karolyi.hu',
include_package_data=True,
description=(
'Email validator with regex, blacklisted domains and SMTP checking.'),
long_description=_LONG_DESC,
long_description=Path(__file__).parent.joinpath('README.rst').read_text(),
long_description_content_type='text/x-rst',
keywords='email validation verification mx verify',
url='http://github.com/karolyi/py3-validate-email',
cmdclass=dict(
develop=DevelopCommand, sdist=SdistCommand, build_py=BuildPyCommand),
cmdclass=dict(build_py=BuildPyCommand, develop=DevelopCommand),
license='LGPL',
)

View File

@ -9,10 +9,6 @@ from typing import Callable, Optional
from urllib.error import HTTPError
from urllib.request import Request, urlopen
from filelock import FileLock
from .utils import is_setuptime
LOGGER = getLogger(__name__)
TMP_PATH = Path(gettempdir()).joinpath(
f'{gettempprefix()}-py3-validate-email-{geteuid()}')
@ -37,57 +33,30 @@ class BlacklistUpdater(object):
"""
_refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days
_is_install_time: bool = False
@property
def _etag_filepath(self) -> str:
'Return the ETag file path to use.'
return ETAG_FILEPATH_INSTALLED \
if self._is_install_time else ETAG_FILEPATH_TMP
@property
def _blacklist_filepath(self) -> str:
'Return the blacklist file path to use.'
return BLACKLIST_FILEPATH_INSTALLED \
if self._is_install_time else BLACKLIST_FILEPATH_TMP
def _read_etag(self) -> Optional[str]:
'Read the etag header from the stored etag file when exists.'
for path in [ETAG_FILEPATH_TMP, ETAG_FILEPATH_INSTALLED]:
try:
with open(path) as fd:
return fd.read().strip()
return path.read_text().strip()
except FileNotFoundError:
pass
def _write_etag(self, content: str):
'Write the etag of the newly received file to the cache.'
path = self._etag_filepath
LOGGER.debug(msg=f'Storing ETag response into {path}.')
with open(path, 'w') as fd:
fd.write(content)
@property
def _is_old(self) -> bool:
'Return `True` if the locally stored file is old.'
true_when_older_than = time() - self._refresh_when_older_than
try:
ctime = BLACKLIST_FILEPATH_TMP.stat().st_ctime
if ctime >= true_when_older_than:
# Downloaded tmp file is still fresh enough
return False
except FileNotFoundError:
pass
try:
ctime = BLACKLIST_FILEPATH_INSTALLED.stat().st_ctime
except FileNotFoundError:
return True
return ctime < true_when_older_than
for path in [BLACKLIST_FILEPATH_TMP, BLACKLIST_FILEPATH_INSTALLED]:
try:
return path.stat().st_ctime < true_when_older_than
except FileNotFoundError:
pass
return True # no file found at all
def _get_headers(self, force_update: bool = False) -> dict:
'Compile a header with etag if available.'
headers = dict()
if force_update or self._is_install_time:
if force_update:
return headers
etag = self._read_etag()
if not etag:
@ -95,27 +64,40 @@ class BlacklistUpdater(object):
headers['If-None-Match'] = etag
return headers
def _write_new_file(self, response: HTTPResponse):
'Write new data file on its arrival.'
def _download(self, headers: dict, blacklist_path: Path, etag_path: Path):
'Downlad and store blacklist file.'
LOGGER.debug(msg=f'Checking {BLACKLIST_URL}')
request = Request(url=BLACKLIST_URL, headers=headers)
response = urlopen(url=request) # type: HTTPResponse
# New data available
LOGGER.debug(msg=f'Writing response into {blacklist_path}')
blacklist_path.write_bytes(response.fp.read())
if 'ETag' in response.headers:
self._write_etag(response.headers.get('ETag'))
path = self._blacklist_filepath
LOGGER.debug(msg=f'Writing response into {path}')
with open(path, 'wb') as fd:
fd.write(response.fp.read())
LOGGER.debug(msg=f'Storing ETag response into {etag_path}.')
etag_path.write_text(response.headers['ETag'])
def _install(self):
"""
Download and store the blacklist file and the matching etag file
into the library path. This is executed from setup.py upon
installation of the library. Don't call this in your
application.
"""
LIB_PATH_DEFAULT.mkdir(exist_ok=True)
self._download(
headers={}, blacklist_path=BLACKLIST_FILEPATH_INSTALLED,
etag_path=ETAG_FILEPATH_INSTALLED)
def _process(self, force: bool = False):
'Start optionally updating the blacklist.txt file, while locked.'
if not force and not self._is_old:
LOGGER.debug(msg='Not updating because file is fresh enough.')
return
LOGGER.debug(msg=f'Checking {BLACKLIST_URL}')
request = Request(
url=BLACKLIST_URL, headers=self._get_headers(force_update=force))
try:
response = urlopen(url=request) # type: HTTPResponse
# New data available
self._write_new_file(response=response)
self._download(
headers=self._get_headers(force_update=force),
blacklist_path=BLACKLIST_FILEPATH_TMP,
etag_path=ETAG_FILEPATH_TMP)
except HTTPError as exc:
if exc.code == 304:
# Not modified, update date on the tmp file
@ -128,6 +110,8 @@ class BlacklistUpdater(object):
self, force: bool = False, callback: Optional[Callable] = None):
'Start optionally updating the blacklist.txt file.'
# Locking to avoid multi-process update on multi-process startup
# Import filelock locally because this module is als used by setup.py
from filelock import FileLock
with FileLock(lock_file=LOCK_PATH):
self._process(force=force)
# Always execute callback because multiple processes can have
@ -144,8 +128,6 @@ def update_builtin_blacklist(
Update and reload the built-in blacklist. Return the `Thread` used
to do the background update, so it can be `join()`-ed.
"""
if is_setuptime():
return
LOGGER.info(msg='Starting optional update of built-in blacklist.')
blacklist_updater = BlacklistUpdater()
kwargs = dict(force=force, callback=callback)

View File

@ -1,20 +0,0 @@
try:
from sys import _getframe
getframe = _getframe
except ImportError:
getframe = None
from traceback import walk_stack
def is_setuptime() -> bool:
'Return `True` if called from setup.'
if getframe is None:
# This is not CPython, can't know if this is setup time
return False
for frame, lineno in walk_stack(f=getframe()):
# @See setup.py
if frame.f_locals and \
frame.f_locals.get('_IS_VALIDATEEMAIL_SETUP') is True:
return True
return False