Added test cases for regex
This commit is contained in:
parent
eaf2668a84
commit
fe914d6f57
|
@ -0,0 +1,65 @@
|
|||
# All we are really doing is comparing the input string to one
|
||||
# gigantic regular expression. But building that regexp, and
|
||||
# ensuring its correctness, is made much easier by assembling it
|
||||
# from the "tokens" defined by the RFC. Each of these tokens is
|
||||
# tested in the accompanying unit test file.
|
||||
#
|
||||
# The section of RFC 2822 from which each pattern component is
|
||||
# derived is given in an accompanying comment.
|
||||
#
|
||||
# (To make things simple, every string below is given as 'raw',
|
||||
# even when it's not strictly necessary. This way we don't forget
|
||||
# when it is necessary.)
|
||||
#
|
||||
|
||||
|
||||
import re
|
||||
|
||||
|
||||
WSP = r'[\s]' # see 2.2.2. Structured Header Field Bodies
|
||||
CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
|
||||
NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
|
||||
QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
|
||||
FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \
|
||||
WSP + r'+)' # see 3.2.3. Folding white space and comments
|
||||
CTEXT = r'[' + NO_WS_CTL + \
|
||||
r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
|
||||
CCONTENT = r'(?:' + CTEXT + r'|' + \
|
||||
QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here
|
||||
# as well, but that would be circular.)
|
||||
COMMENT = r'\((?:' + FWS + r'?' + CCONTENT + \
|
||||
r')*' + FWS + r'?\)' # see 3.2.3
|
||||
CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \
|
||||
FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3
|
||||
ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
|
||||
ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4
|
||||
DOT_ATOM_TEXT = ATEXT + r'+(?:\.' + ATEXT + r'+)*' # see 3.2.4
|
||||
DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4
|
||||
QTEXT = r'[' + NO_WS_CTL + \
|
||||
r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
|
||||
QCONTENT = r'(?:' + QTEXT + r'|' + \
|
||||
QUOTED_PAIR + r')' # see 3.2.5
|
||||
QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \
|
||||
r'?' + QCONTENT + r')*' + FWS + \
|
||||
r'?' + r'"' + CFWS + r'?'
|
||||
LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \
|
||||
QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification
|
||||
DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1
|
||||
DCONTENT = r'(?:' + DTEXT + r'|' + \
|
||||
QUOTED_PAIR + r')' # see 3.4.1
|
||||
DOMAIN_LITERAL = CFWS + r'?' + r'\[' + \
|
||||
r'(?:' + FWS + r'?' + DCONTENT + \
|
||||
r')*' + FWS + r'?\]' + CFWS + r'?' # see 3.4.1
|
||||
DOMAIN = r'(?:' + DOT_ATOM + r'|' + \
|
||||
DOMAIN_LITERAL + r')' # see 3.4.1
|
||||
ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1
|
||||
VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
|
||||
|
||||
|
||||
def email_has_valid_structure(email_address):
|
||||
if re.match(VALID_ADDRESS_REGEXP, email_address):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
from email_regex import email_has_valid_structure
|
||||
|
||||
VALID_EMAIL_ADDRESS_EXAMPLES = [
|
||||
"email@domain.com", # basic valid email
|
||||
"firstname.lastname@domain.com", # dot in address field
|
||||
"email@subdomain.domain.com", # dot in subdomain
|
||||
"firstname+lastname@domain.com", # + in address field
|
||||
"email@123.123.123.123", # domain address is IP address
|
||||
"email@[123.123.123.123]", # square brackets around IP address
|
||||
"\"email\"@domain.com", # quote marks in address fields
|
||||
"1234567890@domain.com", # numbers in address field
|
||||
"email@domain-one.com", # dash in subdomain
|
||||
"_______@domain.com", # underscore in address field
|
||||
"email@domain.name", # .name top level domain name
|
||||
"email@domain.co.jp", # dot in top level domain
|
||||
"firstname-lastname@domain.com" # dash in address field
|
||||
]
|
||||
|
||||
INVALID_EMAIL_ADDRESS_EXAMPLES = [
|
||||
"plainaddress", # Missing @ sign and domain,
|
||||
"#@%^%#$@#$@#.com", # Garbage,
|
||||
"@domain.com", # Missing username,
|
||||
"Joe Smith <email@domain.com>", # Encoded html within email is invalid,
|
||||
"email.domain.com", # Missing @,
|
||||
"email@domain@domain.com", # Two @ sign,
|
||||
".email@domain.com", # Leading dot in address is not allowed,
|
||||
"email.@domain.com", # Trailing dot in address is not allowed,
|
||||
"email..email@domain.com", # Multiple dots,
|
||||
"あいうえお@domain.com", # Unicode char as address,
|
||||
"email@domain.com (Joe Smith)", # Text followed email is not allowed,
|
||||
"email@domain", # Missing top level domain (.com/.net/.org/etc),
|
||||
"email@-domain.com", # Leading dash in front of domain is invalid,
|
||||
"email@domain.web", # .web is not a valid top level domain,
|
||||
"email@111.222.333.44444", # Invalid IP format,
|
||||
"email@domain..com", # Multiple dot in the domain portion is invalid
|
||||
]
|
||||
|
||||
|
||||
def test_valid_email_structure_regex():
|
||||
for index, valid_email_address in enumerate(VALID_EMAIL_ADDRESS_EXAMPLES):
|
||||
try:
|
||||
assert email_has_valid_structure(valid_email_address) is True
|
||||
except AssertionError:
|
||||
raise AssertionError(
|
||||
("{} should be valid ({}th email address in the list)"
|
||||
.format(valid_email_address, index)))
|
||||
|
||||
def test_invalid_email_structure_regex():
|
||||
for index, invalid_email_address in enumerate(INVALID_EMAIL_ADDRESS_EXAMPLES):
|
||||
try:
|
||||
assert email_has_valid_structure(invalid_email_address) is False
|
||||
except AssertionError:
|
||||
raise AssertionError(
|
||||
("{} should be invalid ({}th email address in the list)"
|
||||
.format(invalid_email_address, index)))
|
|
@ -17,80 +17,14 @@
|
|||
# exception of a circular definition (see comments below), and
|
||||
# with the omission of the pattern components marked as "obsolete".
|
||||
|
||||
import re
|
||||
import smtplib
|
||||
import logging
|
||||
import socket
|
||||
|
||||
try:
|
||||
raw_input
|
||||
except NameError:
|
||||
def raw_input(prompt=''):
|
||||
return input(prompt)
|
||||
class ServerError(Exception):
|
||||
pass
|
||||
|
||||
try:
|
||||
import DNS
|
||||
ServerError = DNS.ServerError
|
||||
DNS.DiscoverNameServers()
|
||||
except (ImportError, AttributeError):
|
||||
DNS = None
|
||||
|
||||
class ServerError(Exception):
|
||||
pass
|
||||
|
||||
# All we are really doing is comparing the input string to one
|
||||
# gigantic regular expression. But building that regexp, and
|
||||
# ensuring its correctness, is made much easier by assembling it
|
||||
# from the "tokens" defined by the RFC. Each of these tokens is
|
||||
# tested in the accompanying unit test file.
|
||||
#
|
||||
# The section of RFC 2822 from which each pattern component is
|
||||
# derived is given in an accompanying comment.
|
||||
#
|
||||
# (To make things simple, every string below is given as 'raw',
|
||||
# even when it's not strictly necessary. This way we don't forget
|
||||
# when it is necessary.)
|
||||
#
|
||||
WSP = r'[\s]' # see 2.2.2. Structured Header Field Bodies
|
||||
CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
|
||||
NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
|
||||
QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
|
||||
FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \
|
||||
WSP + r'+)' # see 3.2.3. Folding white space and comments
|
||||
CTEXT = r'[' + NO_WS_CTL + \
|
||||
r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
|
||||
CCONTENT = r'(?:' + CTEXT + r'|' + \
|
||||
QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here
|
||||
# as well, but that would be circular.)
|
||||
COMMENT = r'\((?:' + FWS + r'?' + CCONTENT + \
|
||||
r')*' + FWS + r'?\)' # see 3.2.3
|
||||
CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \
|
||||
FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3
|
||||
ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
|
||||
ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4
|
||||
DOT_ATOM_TEXT = ATEXT + r'+(?:\.' + ATEXT + r'+)*' # see 3.2.4
|
||||
DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4
|
||||
QTEXT = r'[' + NO_WS_CTL + \
|
||||
r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
|
||||
QCONTENT = r'(?:' + QTEXT + r'|' + \
|
||||
QUOTED_PAIR + r')' # see 3.2.5
|
||||
QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \
|
||||
r'?' + QCONTENT + r')*' + FWS + \
|
||||
r'?' + r'"' + CFWS + r'?'
|
||||
LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \
|
||||
QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification
|
||||
DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1
|
||||
DCONTENT = r'(?:' + DTEXT + r'|' + \
|
||||
QUOTED_PAIR + r')' # see 3.4.1
|
||||
DOMAIN_LITERAL = CFWS + r'?' + r'\[' + \
|
||||
r'(?:' + FWS + r'?' + DCONTENT + \
|
||||
r')*' + FWS + r'?\]' + CFWS + r'?' # see 3.4.1
|
||||
DOMAIN = r'(?:' + DOT_ATOM + r'|' + \
|
||||
DOMAIN_LITERAL + r')' # see 3.4.1
|
||||
ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1
|
||||
|
||||
# A valid address will match exactly the 3.4.1 addr-spec.
|
||||
VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
|
||||
|
||||
MX_DNS_CACHE = {}
|
||||
MX_CHECK_CACHE = {}
|
||||
|
@ -123,56 +57,57 @@ def validate_email(email, check_mx=False, verify=False, debug=False, smtp_timeou
|
|||
else:
|
||||
logger = None
|
||||
|
||||
try:
|
||||
assert re.match(VALID_ADDRESS_REGEXP, email) is not None
|
||||
check_mx |= verify
|
||||
if check_mx:
|
||||
if not DNS:
|
||||
raise Exception('For check the mx records or check if the email exists you must '
|
||||
'have installed pyDNS python package')
|
||||
hostname = email[email.find('@') + 1:]
|
||||
mx_hosts = get_mx_ip(hostname)
|
||||
if mx_hosts is None:
|
||||
return False
|
||||
for mx in mx_hosts:
|
||||
try:
|
||||
if not verify and mx[1] in MX_CHECK_CACHE:
|
||||
return MX_CHECK_CACHE[mx[1]]
|
||||
smtp = smtplib.SMTP(timeout=smtp_timeout)
|
||||
smtp.connect(mx[1])
|
||||
MX_CHECK_CACHE[mx[1]] = True
|
||||
if not verify:
|
||||
try:
|
||||
if not email_has_valid_structure(email_address):
|
||||
return False
|
||||
|
||||
check_mx |= verify
|
||||
if check_mx:
|
||||
if not DNS:
|
||||
raise Exception('For check the mx records or check if the email exists you must '
|
||||
'have installed pyDNS python package')
|
||||
hostname = email[email.find('@') + 1:]
|
||||
mx_hosts = get_mx_ip(hostname)
|
||||
if mx_hosts is None:
|
||||
return False
|
||||
for mx in mx_hosts:
|
||||
try:
|
||||
if not verify and mx[1] in MX_CHECK_CACHE:
|
||||
return MX_CHECK_CACHE[mx[1]]
|
||||
smtp = smtplib.SMTP(timeout=smtp_timeout)
|
||||
smtp.connect(mx[1])
|
||||
MX_CHECK_CACHE[mx[1]] = True
|
||||
if not verify:
|
||||
try:
|
||||
smtp.quit()
|
||||
except smtplib.SMTPServerDisconnected:
|
||||
pass
|
||||
return True
|
||||
status, _ = smtp.helo()
|
||||
if status != 250:
|
||||
smtp.quit()
|
||||
except smtplib.SMTPServerDisconnected:
|
||||
pass
|
||||
return True
|
||||
status, _ = smtp.helo()
|
||||
if status != 250:
|
||||
smtp.quit()
|
||||
if debug:
|
||||
logger.debug(u'%s answer: %s - %s', mx[1], status, _)
|
||||
continue
|
||||
smtp.mail('')
|
||||
status, _ = smtp.rcpt(email)
|
||||
if status == 250:
|
||||
smtp.quit()
|
||||
return True
|
||||
if debug:
|
||||
logger.debug(u'%s answer: %s - %s', mx[1], status, _)
|
||||
continue
|
||||
smtp.mail('')
|
||||
status, _ = smtp.rcpt(email)
|
||||
if status == 250:
|
||||
smtp.quit()
|
||||
return True
|
||||
if debug:
|
||||
logger.debug(u'%s answer: %s - %s', mx[1], status, _)
|
||||
smtp.quit()
|
||||
except smtplib.SMTPServerDisconnected: # Server not permits verify user
|
||||
if debug:
|
||||
logger.debug(u'%s disconected.', mx[1])
|
||||
except smtplib.SMTPConnectError:
|
||||
if debug:
|
||||
logger.debug(u'Unable to connect to %s.', mx[1])
|
||||
return None
|
||||
except AssertionError:
|
||||
return False
|
||||
except (ServerError, socket.error) as e:
|
||||
if debug:
|
||||
logger.debug('ServerError or socket.error exception raised (%s).', e)
|
||||
except smtplib.SMTPServerDisconnected: # Server not permits verify user
|
||||
if debug:
|
||||
logger.debug(u'%s disconected.', mx[1])
|
||||
except smtplib.SMTPConnectError:
|
||||
if debug:
|
||||
logger.debug(u'Unable to connect to %s.', mx[1])
|
||||
return None
|
||||
except AssertionError:
|
||||
return False
|
||||
except (ServerError, socket.error) as e:
|
||||
if debug:
|
||||
logger.debug('ServerError or socket.error exception raised (%s).', e)
|
||||
return None
|
||||
return True
|
||||
|
||||
|
|
Loading…
Reference in New Issue