More formatting

This commit is contained in:
László Károlyi 2019-03-01 21:49:39 +01:00
parent db8a24b5d4
commit bb352cdc98
Signed by: karolyi
GPG Key ID: 2DCAF25E55735BFE
3 changed files with 44 additions and 52 deletions

View File

@ -7,6 +7,8 @@
# The section of RFC 2822 from which each pattern component is
# derived is given in an accompanying comment.
#
# https://tools.ietf.org/html/rfc2822
#
# (To make things simple, every string below is given as 'raw',
# even when it's not strictly necessary. This way we don't forget
# when it is necessary.)
@ -16,49 +18,40 @@
import re
WSP = r'[\s]' # see 2.2.2. Structured Header Field Bodies
CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
FWS = r'(?:(?:' + WSP + r'*' + CRLF + r')?' + \
WSP + r'+)' # see 3.2.3. Folding white space and comments
CTEXT = r'[' + NO_WS_CTL + \
r'\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
CCONTENT = r'(?:' + CTEXT + r'|' + \
QUOTED_PAIR + r')' # see 3.2.3 (NB: The RFC includes COMMENT here
WSP = r'\s' # see 2.2.2. Structured Header Field Bodies
CRLF = r'(?:\r\n)' # see 2.2.3. Long Header Fields
NO_WS_CTL = r'\x01-\x08\x0b\x0c\x0f-\x1f\x7f' # see 3.2.1. Primitive Tokens
QUOTED_PAIR = r'(?:\\.)' # see 3.2.2. Quoted characters
# see 3.2.3. Folding white space and comments
FWS = rf'(?:(?:{WSP}*{CRLF})?{WSP}+)'
CTEXT = rf'[{NO_WS_CTL}\x21-\x27\x2a-\x5b\x5d-\x7e]' # see 3.2.3
# see 3.2.3 (NB: The RFC includes COMMENT here
CCONTENT = rf'(?:{CTEXT}|{QUOTED_PAIR})'
# as well, but that would be circular.)
COMMENT = r'\((?:' + FWS + r'?' + CCONTENT + \
r')*' + FWS + r'?\)' # see 3.2.3
CFWS = r'(?:' + FWS + r'?' + COMMENT + ')*(?:' + \
FWS + '?' + COMMENT + '|' + FWS + ')' # see 3.2.3
ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
ATOM = CFWS + r'?' + ATEXT + r'+' + CFWS + r'?' # see 3.2.4
DOT_ATOM_TEXT = ATEXT + r'+(?:\.' + ATEXT + r'+)*' # see 3.2.4
DOT_ATOM = CFWS + r'?' + DOT_ATOM_TEXT + CFWS + r'?' # see 3.2.4
QTEXT = r'[' + NO_WS_CTL + \
r'\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
QCONTENT = r'(?:' + QTEXT + r'|' + \
QUOTED_PAIR + r')' # see 3.2.5
QUOTED_STRING = CFWS + r'?' + r'"(?:' + FWS + \
r'?' + QCONTENT + r')*' + FWS + \
r'?' + r'"' + CFWS + r'?'
LOCAL_PART = r'(?:' + DOT_ATOM + r'|' + \
QUOTED_STRING + r')' # see 3.4.1. Addr-spec specification
DTEXT = r'[' + NO_WS_CTL + r'\x21-\x5a\x5e-\x7e]' # see 3.4.1
DCONTENT = r'(?:' + DTEXT + r'|' + \
QUOTED_PAIR + r')' # see 3.4.1
DOMAIN_LITERAL = CFWS + r'?' + r'\[' + \
r'(?:' + FWS + r'?' + DCONTENT + \
r')*' + FWS + r'?\]' + CFWS + r'?' # see 3.4.1
DOMAIN = r'(?:' + DOT_ATOM + r'|' + \
DOMAIN_LITERAL + r')' # see 3.4.1
ADDR_SPEC = LOCAL_PART + r'@' + DOMAIN # see 3.4.1
VALID_ADDRESS_REGEXP = '^' + ADDR_SPEC + '$'
COMMENT = rf'\((?:{FWS}?{CCONTENT})*{FWS}?\)' # see 3.2.3
CFWS = rf'(?:{FWS}?{COMMENT})*(?:{FWS}?{COMMENT}|{FWS})' # see 3.2.3
ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4. Atom
ATOM = rf'{CFWS}?{ATEXT}+{CFWS}?' # see 3.2.4
DOT_ATOM_TEXT = rf'{ATEXT}+(?:\.{ATEXT}+)*' # see 3.2.4
DOT_ATOM = rf'{CFWS}?{DOT_ATOM_TEXT}{CFWS}?' # see 3.2.4
QTEXT = rf'[{NO_WS_CTL}\x21\x23-\x5b\x5d-\x7e]' # see 3.2.5. Quoted strings
QCONTENT = rf'(?:{QTEXT}|{QUOTED_PAIR})' # see 3.2.5
QUOTED_STRING = rf'{CFWS}?"(?:{FWS}?{QCONTENT})*{FWS}?"{CFWS}?'
# see 3.4.1. Addr-spec specification
LOCAL_PART = rf'(?:{DOT_ATOM}|{QUOTED_STRING})'
DTEXT = rf'[{NO_WS_CTL}\x21-\x5a\x5e-\x7e]' # see 3.4.1
DCONTENT = rf'(?:{DTEXT}|{QUOTED_PAIR})' # see 3.4.1
DOMAIN_LITERAL = rf'{CFWS}?\[(?:{FWS}?{DCONTENT})*{FWS}?\]{CFWS}?' # see 3.4.1
DOMAIN = rf'(?:{DOT_ATOM}|{DOMAIN_LITERAL})' # see 3.4.1
ADDR_SPEC = rf'{LOCAL_PART}@{DOMAIN}' # see 3.4.1
VALID_ADDRESS_REGEXP = rf'^{ADDR_SPEC}$'
_matcher = re.compile(pattern=VALID_ADDRESS_REGEXP, flags=re.DOTALL)
def regex_check(email_address):
if any(ord(char) > 127 for char in email_address):
return False
if re.match(VALID_ADDRESS_REGEXP, email_address):
if _matcher.match(string=email_address):
return True
return False

View File

@ -1,13 +1,12 @@
from pyemailval.regex_check import _get_domain_from_email_address
from pyemailval.mx_check import _get_domain_from_email_address
DOMAINS = {
"email@domain.com": "domain.com",
"email@subdomain.domain.com": "subdomain.domain.com",
"email@123.123.123.123": "123.123.123.123",
"email@[123.123.123.123]": "123.123.123.123",
"email@domain-one.com": "domain-one.com",
"email@domain.co.jp": "domain.co.jp",
'email@domain.com': 'domain.com',
'email@subdomain.domain.com': 'subdomain.domain.com',
'email@123.123.123.123': '123.123.123.123',
'email@[123.123.123.123]': '123.123.123.123',
'email@domain-one.com': 'domain-one.com',
'email@domain.co.jp': 'domain.co.jp',
}
@ -19,5 +18,6 @@ def test_domain_from_email_address():
assert domain_from_function == domain
except AssertionError:
raise AssertionError(
"Email address {} should result in domain {} but resulted in domain {}"
.format(email_address, domain, domain_from_function))
'Email address {} should result in domain {} but resulted in '
'domain {}'.format(
email_address, domain, domain_from_function))

View File

@ -16,7 +16,7 @@ VALID_EMAIL_ADDRESS_EXAMPLES = [
'firstname-lastname@domain.com' # dash in address field
]
INVALID_EMAIL_ADDRESS_EXAMPLES = [
INVALID_EXAMPLES = [
'plainaddress', # missing @ sign and domain
'#@%^%#$@#$@#.com', # garbage
'@domain.com', # missing username
@ -47,11 +47,10 @@ def test_valid_email_structure_regex():
def test_invalid_email_structure_regex():
for index, invalid_email_address in \
enumerate(INVALID_EMAIL_ADDRESS_EXAMPLES):
for idx, invalid_email_address in enumerate(INVALID_EXAMPLES):
try:
assert regex_check(invalid_email_address) is False
except AssertionError:
raise AssertionError(
'{} should be invalid ({}th email address in the list)'
.format(invalid_email_address, index))
.format(invalid_email_address, idx))