Separate blacklist check

This commit is contained in:
László Károlyi 2019-11-21 15:41:05 +01:00
parent fee6445271
commit 42dd94ee18
Signed by: karolyi
GPG Key ID: 2DCAF25E55735BFE
8 changed files with 122 additions and 44 deletions

1
.gitignore vendored
View File

@ -1,4 +1,5 @@
*.pyc
validate_email/lib
build
dist
MANIFEST

View File

@ -1,3 +1,6 @@
0.1.12:
- Blacklist/whitelist domains checking is now independent of regex checking.
0.1.11:
- Handling IDNA errors

View File

@ -1,8 +1,2 @@
dnspython==1.16.0
entrypoints==0.3
flake8==3.7.7
idna==2.8
isort==4.3.21
mccabe==0.6.1
pycodestyle==2.5.0
pyflakes==2.1.1

View File

@ -4,7 +4,7 @@ from urllib.request import urlopen
from setuptools import find_packages, setup
from setuptools.command.build_py import build_py
blacklist_url = (
BLACKLIST_URL = (
'https://raw.githubusercontent.com/martenson/disposable-email-domains/'
'master/disposable_email_blocklist.conf')
@ -15,10 +15,10 @@ class PostBuildPyCommand(build_py):
def run(self):
if self.dry_run:
return super().run()
with urlopen(url=blacklist_url) as fd:
with urlopen(url=BLACKLIST_URL) as fd:
content = fd.read().decode('utf-8')
target_dir = join(self.build_lib, 'validate_email/lib')
self.mkpath(target_dir)
self.mkpath(name=target_dir)
with open(join(target_dir, 'blacklist.txt'), 'w') as fd:
fd.write(content)
super().run()
@ -26,7 +26,7 @@ class PostBuildPyCommand(build_py):
setup(
name='py3-validate-email',
version='0.1.11',
version='0.1.12',
packages=find_packages(exclude=['tests']),
install_requires=['dnspython>=1.16.0', 'idna>=2.8'],
author='László Károlyi',

View File

@ -0,0 +1,59 @@
from os import makedirs
from os.path import dirname, join
from unittest.case import TestCase
from urllib.request import urlopen
from validate_email import validate_email
from validate_email.domainlist_check import domainlist_check
BLACKLIST_URL = (
'https://raw.githubusercontent.com/martenson/disposable-email-domains/'
'master/disposable_email_blocklist.conf')
class DlBlacklist(object):
'Emulating downloading of blacklists on post-build command.'
def __init__(self):
from validate_email import domainlist_check
self.build_lib = dirname(dirname(domainlist_check.__file__))
def mkpath(self, name: str):
'Emulate mkpath.'
makedirs(name=name, exist_ok=True)
def run(self):
'Deploy function identical to the one in setup.py.'
with urlopen(url=BLACKLIST_URL) as fd:
content = fd.read().decode('utf-8')
target_dir = join(self.build_lib, 'validate_email/lib')
self.mkpath(name=target_dir)
with open(join(target_dir, 'blacklist.txt'), 'w') as fd:
fd.write(content)
class BlacklistCheckTestCase(TestCase):
'Testing if the included blacklist filtering works.'
def test_blacklist_positive(self):
'Disallows blacklist item: mailinator.com.'
dl = DlBlacklist()
dl.run()
self.assertFalse(expr=domainlist_check(
email_address='pa2@mailinator.com'))
self.assertFalse(expr=validate_email(
email_address='pa2@mailinator.com', check_regex=False,
use_blacklist=True))
self.assertFalse(expr=validate_email(
email_address='pa2@mailinator.com', check_regex=True,
use_blacklist=True))
def test_blacklist_negative(self):
'Allows a domain not in the blacklist.'
self.assertTrue(expr=domainlist_check(
email_address='pa2@some-random-domain-thats-not-blacklisted.com'))
def test_erroneous_email(self):
'Will reject emails in erroneous format.'
self.assertFalse(expr=domainlist_check(
email_address='pa2-mailinator.com'))

View File

@ -0,0 +1,46 @@
from os.path import dirname, join
from typing import Optional
SetOrNone = Optional[set]
class DomainListValidator(object):
'Check the provided email against domain lists.'
domain_whitelist = frozenset()
domain_blacklist = frozenset()
def __init__(
self, whitelist: SetOrNone = None, blacklist: SetOrNone = None):
if whitelist:
self.domain_whitelist = set(x.lower() for x in whitelist)
if blacklist:
self.domain_blacklist = set(x.lower() for x in blacklist)
else:
self._load_builtin_blacklist()
def _load_builtin_blacklist(self):
'Load our built-in blacklist.'
path = join(dirname(__file__), 'lib', 'blacklist.txt')
try:
with open(path) as fd:
lines = fd.readlines()
except FileNotFoundError:
return
self.domain_blacklist = \
set(x.strip().lower() for x in lines if x.strip())
def __call__(self, email_address: str) -> bool:
'Do the checking here.'
if not email_address or '@' not in email_address:
return False
user_part, domain_part = email_address.rsplit('@', 1)
if domain_part in self.domain_whitelist:
return True
if domain_part in self.domain_blacklist:
return False
return True
domainlist_check = DomainListValidator()

View File

@ -1,5 +1,4 @@
from ipaddress import IPv4Address, IPv6Address
from os.path import dirname, join
from typing import Optional
from .constants import HOST_REGEX, LITERAL_REGEX, USER_REGEX
@ -32,44 +31,18 @@ def _validate_ipv46_address(value: str) -> bool:
return _validate_ipv6_address(value)
class EmailValidator(object):
class RegexValidator(object):
'Slightly adjusted email regex checker from the Django project.'
domain_whitelist = frozenset('localhost')
domain_blacklist = frozenset()
def __init__(
self, whitelist: SetOrNone = None, blacklist: SetOrNone = None):
self.domain_whitelist = set(whitelist) \
if whitelist else self.domain_whitelist
self._load_blacklist(blacklist=blacklist)
def _load_blacklist(self, blacklist: SetOrNone = None):
'Load our blacklist.'
self.domain_blacklist = set(blacklist) \
if blacklist else self.domain_blacklist
path = join(dirname(__file__), 'lib', 'blacklist.txt')
try:
with open(path) as fd:
lines = fd.readlines()
except FileNotFoundError:
return
self.domain_blacklist = self.domain_blacklist.union(
x.strip() for x in lines)
def __call__(self, value: str, use_blacklist: bool = True) -> bool:
if not value or '@' not in value:
def __call__(self, email_address: str, use_blacklist: bool = True) -> bool:
if not email_address or '@' not in email_address:
return False
user_part, domain_part = value.rsplit('@', 1)
user_part, domain_part = email_address.rsplit('@', 1)
if not USER_REGEX.match(user_part):
return False
if domain_part in self.domain_whitelist:
return True
if domain_part in self.domain_blacklist:
return False
if not self.validate_domain_part(domain_part):
# Try for possible IDN domain-part
try:
@ -93,4 +66,4 @@ class EmailValidator(object):
return False
regex_check = EmailValidator()
regex_check = RegexValidator()

View File

@ -1,5 +1,6 @@
from typing import Optional
from .domainlist_check import domainlist_check
from .mx_check import mx_check
from .regex_check import regex_check
@ -15,8 +16,9 @@ def validate_email(
Return `None` if the result is ambigious.
"""
if check_regex and not regex_check(
value=email_address, use_blacklist=use_blacklist):
if check_regex and not regex_check(email_address=email_address):
return False
if use_blacklist and not domainlist_check(email_address=email_address):
return False
if not check_mx:
return True