diff --git a/.travis.yml b/.travis.yml index efc20d3..10ee333 100644 --- a/.travis.yml +++ b/.travis.yml @@ -12,12 +12,11 @@ python: install: - python -m pip install -U pip wheel setuptools - python -m pip install -U isort flake8 - - python -m pip install -r requirements.txt - - python -Wd setup.py sdist - - python -Wd -m pip install dist/py3-validate-email-*.tar.gz + - python -Wd setup.py sdist -v + - python -Wd -m pip install -v dist/py3-validate-email-*.tar.gz # command to run tests script: - python -m isort -c --skip-glob=venv - python -m flake8 tests/ validate_email/ setup.py - - python -m unittest discover -v + - python -m unittest discover -v -s tests diff --git a/MANIFEST.in b/MANIFEST.in index 6be948f..e6c5277 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -2,7 +2,6 @@ include AUTHORS include LICENSE include README.rst include CHANGELOG.txt -recursive-include validate_email * recursive-exclude tests * recursive-exclude * __pycache__ recursive-exclude * *.pyc diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 0399508..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -dnspython==1.16.0 -idna==2.8 -filelock>=3.0.12 diff --git a/setup.py b/setup.py index 1bed398..c9180ee 100644 --- a/setup.py +++ b/setup.py @@ -1,106 +1,72 @@ +import sys +from distutils import log from pathlib import Path -from shutil import move, rmtree -from subprocess import check_call -from tempfile import mkdtemp from setuptools import find_packages, setup from setuptools.command.build_py import build_py from setuptools.command.develop import develop -from setuptools.command.sdist import sdist - -try: - # OSX Homebrew fix: https://stackoverflow.com/a/53190037/1067833 - from sys import _base_executable as executable -except ImportError: - from sys import executable - -_EGG_REQ_PATH = Path(__file__).parent.joinpath( - 'py3_validate_email.egg-info', 'requires.txt') -_REQ_PATH = Path(__file__).parent.joinpath('requirements.txt') - -with open(_REQ_PATH if _REQ_PATH.exists() else _EGG_REQ_PATH) as fd: - _req_content = fd.readlines() -_DEPENDENCIES = [x.strip() for x in _req_content if x.strip()] - -with open(Path(__file__).parent.joinpath('README.rst')) as fd: - _LONG_DESC = fd.read() -def run_initial_updater(): +def run_initial_updater(path: Path): 'Download an initial blacklist.txt on install time.' - # Install dependencies so the initial update can run - check_call([executable, '-m', 'pip', 'install'] + _DEPENDENCIES) - # The updater will walk code stack frames and see if this - # variable is set in locals() to determine if it is run from the - # setup, in which case it won't autoupdate. - _IS_VALIDATEEMAIL_SETUP = True - from validate_email.updater import BlacklistUpdater, LIB_PATH_DEFAULT - LIB_PATH_DEFAULT.mkdir(exist_ok=True) - blacklist_updater = BlacklistUpdater() - blacklist_updater._is_install_time = _IS_VALIDATEEMAIL_SETUP - blacklist_updater.process(force=True) + # Only import the updater module to avoid requiring all the dependencies + # and auto-running the updater. + sys.path.append(str(path.joinpath('validate_email'))) + orig_dont_write_bytecode = sys.dont_write_bytecode + sys.dont_write_bytecode = True + try: + from updater import BLACKLIST_FILEPATH_INSTALLED, BlacklistUpdater + log.info(f'downloading blacklist to {BLACKLIST_FILEPATH_INSTALLED}') + BlacklistUpdater()._install() + finally: + sys.path = sys.path[:-1] + sys.dont_write_bytecode = orig_dont_write_bytecode class DevelopCommand(develop): - 'Develop command.' + """ + Adapted version of the 'develop' command. + + After finishing the usual build run, download the blacklist and + store it into the source directory, because that is from where the + library will run in a developer install. + """ def run(self): - if self.dry_run: - return super().run() - run_initial_updater() super().run() - - -class SdistCommand(sdist): - 'Sdist command.' - - def run(self): - """ - Manually remove the data directory before creating the - distribution package, every install will create it for - themselves when installing created the python wheel. - `MANIFEST.in` should not remove the data dir since install and - develop/install would exclude it! - """ - if self.dry_run: - return super().run() - tempdir = Path(mkdtemp()).joinpath('data') - data_dir = Path( - __file__).absolute().parent.joinpath('validate_email', 'data') - do_move = data_dir.exists() - if do_move: - move(src=data_dir, dst=tempdir) - super().run() - if do_move: - move(src=tempdir, dst=data_dir) - rmtree(path=tempdir.parent) + if not self.dry_run: + run_initial_updater(Path(__file__).parent) class BuildPyCommand(build_py): - 'BuildPy command.' + """ + Adapted version of the 'build_py' command. + + After finishing the usual build run, download the blacklist and + store it into the build directory. A subsequent 'install' step will + copy the full contents of the build directory to the install + target, thus including the blacklist. + """ def run(self): - if self.dry_run: - return super().run() - run_initial_updater() super().run() + if not self.dry_run: + run_initial_updater(Path(self.build_lib)) setup( name='py3-validate-email', version='0.2.6', packages=find_packages(exclude=['tests']), - install_requires=_DEPENDENCIES, + install_requires=['dnspython~=1.16', 'idna~=2.8', 'filelock~=3.0'], author='László Károlyi', author_email='laszlo@karolyi.hu', - include_package_data=True, description=( 'Email validator with regex, blacklisted domains and SMTP checking.'), - long_description=_LONG_DESC, + long_description=Path(__file__).parent.joinpath('README.rst').read_text(), long_description_content_type='text/x-rst', keywords='email validation verification mx verify', url='http://github.com/karolyi/py3-validate-email', - cmdclass=dict( - develop=DevelopCommand, sdist=SdistCommand, build_py=BuildPyCommand), + cmdclass=dict(build_py=BuildPyCommand, develop=DevelopCommand), license='LGPL', ) diff --git a/validate_email/updater.py b/validate_email/updater.py index c561200..610f60b 100644 --- a/validate_email/updater.py +++ b/validate_email/updater.py @@ -9,10 +9,6 @@ from typing import Callable, Optional from urllib.error import HTTPError from urllib.request import Request, urlopen -from filelock import FileLock - -from .utils import is_setuptime - LOGGER = getLogger(__name__) TMP_PATH = Path(gettempdir()).joinpath( f'{gettempprefix()}-py3-validate-email-{geteuid()}') @@ -37,57 +33,30 @@ class BlacklistUpdater(object): """ _refresh_when_older_than: int = 5 * 24 * 60 * 60 # 5 days - _is_install_time: bool = False - - @property - def _etag_filepath(self) -> str: - 'Return the ETag file path to use.' - return ETAG_FILEPATH_INSTALLED \ - if self._is_install_time else ETAG_FILEPATH_TMP - - @property - def _blacklist_filepath(self) -> str: - 'Return the blacklist file path to use.' - return BLACKLIST_FILEPATH_INSTALLED \ - if self._is_install_time else BLACKLIST_FILEPATH_TMP def _read_etag(self) -> Optional[str]: 'Read the etag header from the stored etag file when exists.' for path in [ETAG_FILEPATH_TMP, ETAG_FILEPATH_INSTALLED]: try: - with open(path) as fd: - return fd.read().strip() + return path.read_text().strip() except FileNotFoundError: pass - def _write_etag(self, content: str): - 'Write the etag of the newly received file to the cache.' - path = self._etag_filepath - LOGGER.debug(msg=f'Storing ETag response into {path}.') - with open(path, 'w') as fd: - fd.write(content) - @property def _is_old(self) -> bool: 'Return `True` if the locally stored file is old.' true_when_older_than = time() - self._refresh_when_older_than - try: - ctime = BLACKLIST_FILEPATH_TMP.stat().st_ctime - if ctime >= true_when_older_than: - # Downloaded tmp file is still fresh enough - return False - except FileNotFoundError: - pass - try: - ctime = BLACKLIST_FILEPATH_INSTALLED.stat().st_ctime - except FileNotFoundError: - return True - return ctime < true_when_older_than + for path in [BLACKLIST_FILEPATH_TMP, BLACKLIST_FILEPATH_INSTALLED]: + try: + return path.stat().st_ctime < true_when_older_than + except FileNotFoundError: + pass + return True # no file found at all def _get_headers(self, force_update: bool = False) -> dict: 'Compile a header with etag if available.' headers = dict() - if force_update or self._is_install_time: + if force_update: return headers etag = self._read_etag() if not etag: @@ -95,27 +64,40 @@ class BlacklistUpdater(object): headers['If-None-Match'] = etag return headers - def _write_new_file(self, response: HTTPResponse): - 'Write new data file on its arrival.' + def _download(self, headers: dict, blacklist_path: Path, etag_path: Path): + 'Downlad and store blacklist file.' + LOGGER.debug(msg=f'Checking {BLACKLIST_URL}') + request = Request(url=BLACKLIST_URL, headers=headers) + response = urlopen(url=request) # type: HTTPResponse + # New data available + LOGGER.debug(msg=f'Writing response into {blacklist_path}') + blacklist_path.write_bytes(response.fp.read()) if 'ETag' in response.headers: - self._write_etag(response.headers.get('ETag')) - path = self._blacklist_filepath - LOGGER.debug(msg=f'Writing response into {path}') - with open(path, 'wb') as fd: - fd.write(response.fp.read()) + LOGGER.debug(msg=f'Storing ETag response into {etag_path}.') + etag_path.write_text(response.headers['ETag']) + + def _install(self): + """ + Download and store the blacklist file and the matching etag file + into the library path. This is executed from setup.py upon + installation of the library. Don't call this in your + application. + """ + LIB_PATH_DEFAULT.mkdir(exist_ok=True) + self._download( + headers={}, blacklist_path=BLACKLIST_FILEPATH_INSTALLED, + etag_path=ETAG_FILEPATH_INSTALLED) def _process(self, force: bool = False): 'Start optionally updating the blacklist.txt file, while locked.' if not force and not self._is_old: LOGGER.debug(msg='Not updating because file is fresh enough.') return - LOGGER.debug(msg=f'Checking {BLACKLIST_URL}') - request = Request( - url=BLACKLIST_URL, headers=self._get_headers(force_update=force)) try: - response = urlopen(url=request) # type: HTTPResponse - # New data available - self._write_new_file(response=response) + self._download( + headers=self._get_headers(force_update=force), + blacklist_path=BLACKLIST_FILEPATH_TMP, + etag_path=ETAG_FILEPATH_TMP) except HTTPError as exc: if exc.code == 304: # Not modified, update date on the tmp file @@ -128,6 +110,8 @@ class BlacklistUpdater(object): self, force: bool = False, callback: Optional[Callable] = None): 'Start optionally updating the blacklist.txt file.' # Locking to avoid multi-process update on multi-process startup + # Import filelock locally because this module is als used by setup.py + from filelock import FileLock with FileLock(lock_file=LOCK_PATH): self._process(force=force) # Always execute callback because multiple processes can have @@ -144,8 +128,6 @@ def update_builtin_blacklist( Update and reload the built-in blacklist. Return the `Thread` used to do the background update, so it can be `join()`-ed. """ - if is_setuptime(): - return LOGGER.info(msg='Starting optional update of built-in blacklist.') blacklist_updater = BlacklistUpdater() kwargs = dict(force=force, callback=callback) diff --git a/validate_email/utils.py b/validate_email/utils.py deleted file mode 100644 index a32372e..0000000 --- a/validate_email/utils.py +++ /dev/null @@ -1,20 +0,0 @@ -try: - from sys import _getframe - getframe = _getframe -except ImportError: - getframe = None - -from traceback import walk_stack - - -def is_setuptime() -> bool: - 'Return `True` if called from setup.' - if getframe is None: - # This is not CPython, can't know if this is setup time - return False - for frame, lineno in walk_stack(f=getframe()): - # @See setup.py - if frame.f_locals and \ - frame.f_locals.get('_IS_VALIDATEEMAIL_SETUP') is True: - return True - return False