From 9d662a2f2356c0924ce71bc57e023023fd66d83b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Mon, 22 Feb 2021 14:32:56 +0100 Subject: [PATCH 01/27] Improve SMTP check error handling * Improve exception class hierarchy * Raise exception on malformatted "from_address" parameter instead of yielding a negative verification result * Add option to raise exceptions on ambiguous results * Improve exception parameters to allow for a more structured analysis of the negative response * Yield a negative verification result upon the first permanent error on RCPT TO, just like a mail server would also bounce after the first permanent error --- README.rst | 8 +- validate_email/exceptions.py | 108 ++++++++++++++++++----- validate_email/mx_check.py | 142 ++++++++++++++++++------------- validate_email/validate_email.py | 7 +- 4 files changed, 182 insertions(+), 83 deletions(-) diff --git a/README.rst b/README.rst index baeddfc..95e6913 100644 --- a/README.rst +++ b/README.rst @@ -31,9 +31,9 @@ Basic usage:: :code:`check_mx`: check the mx-records and check whether the email actually exists -:code:`from_address`: the email address the probe will be sent from, +:code:`from_address`: the email address the probe will be sent from -:code:`helo_host`: the host to use in SMTP HELO when checking for an email, +:code:`helo_host`: the host to use in SMTP HELO when checking for an email :code:`smtp_timeout`: seconds until SMTP timeout @@ -45,6 +45,10 @@ Basic usage:: :code:`skip_smtp`: (default :code:`False`) skip the SMTP conversation with the server, after MX checks. Will automatically be set to :code:`True` when :code:`check_mx` is :code:`False`! +:code:`raise_communication_errors`: Affects the SMTP verification step. If set to :code:`True`, any connection error or SMTP error message from the server will lead to a negative verification result, otherwise it will be regarded as an ambiguous result. Defaults to :code:`False`. This option is mainly used in connection with :code:`validate_email_or_fail()`, where the exception raised can be analyzed to find out the reason for the otherwise ambiguous result. + +:code:`raise_temporary_errors`: Affects the SMTP verification step. If set to :code:`True`, a temporary error reply of the SMTP server to the :code:`RCPT TO` command (as used, for example, with greylisting) will lead to a negative verification result, otherwise it will be regarded as an ambiguous result. Defaults to :code:`False`. This option is mainly used in connection with :code:`validate_email_or_fail()`, where the exception raised can be analyzed to find out the reason for the otherwise ambiguous result. + The function :code:`validate_email_or_fail()` works exactly like :code:`validate_email`, except that it raises an exception in the case of validation failure instead of returning :code:`False`. The module will try to negotiate a TLS connection with STARTTLS, and silently fall back to an unencrypted SMTP connection if the server doesn't support it. diff --git a/validate_email/exceptions.py b/validate_email/exceptions.py index d08a714..cbffee8 100644 --- a/validate_email/exceptions.py +++ b/validate_email/exceptions.py @@ -1,20 +1,22 @@ -from typing import Iterable +from typing import Dict, Tuple -class EmailValidationError(Exception): - 'Base class for all exceptions indicating validation failure.' +class Error(Exception): + 'Base class for all exceptions of this module.' message = 'Unknown error.' def __str__(self): return self.message -class AddressFormatError(EmailValidationError): - 'Raised when the email address has an invalid format.' - message = 'Invalid email address.' +class ParameterError(Error): + """ + Base class for all exceptions indicating a wrong function parameter. + """ + pass -class FromAddressFormatError(EmailValidationError): +class FromAddressFormatError(ParameterError): """ Raised when the from email address used for the MX check has an invalid format. @@ -22,6 +24,16 @@ class FromAddressFormatError(EmailValidationError): message = 'Invalid "From:" email address.' +class EmailValidationError(Error): + 'Base class for all exceptions indicating validation failure.' + pass + + +class AddressFormatError(EmailValidationError): + 'Raised when the email address has an invalid format.' + message = 'Invalid email address.' + + class DomainBlacklistedError(EmailValidationError): """ Raised when the domain of the email address is blacklisted on @@ -30,43 +42,101 @@ class DomainBlacklistedError(EmailValidationError): message = 'Domain blacklisted.' -class DomainNotFoundError(EmailValidationError): +class MXError(EmailValidationError): + """ + Base class of all exceptions that indicate failure to determine a + valid MX for the domain of email address. + """ + pass + + +class DomainNotFoundError(MXError): 'Raised when the domain is not found.' message = 'Domain not found.' -class NoNameserverError(EmailValidationError): +class NoNameserverError(MXError): 'Raised when the domain does not resolve by nameservers in time.' message = 'No nameserver found for domain.' -class DNSTimeoutError(EmailValidationError): +class DNSTimeoutError(MXError): 'Raised when the domain lookup times out.' message = 'Domain lookup timed out.' -class DNSConfigurationError(EmailValidationError): +class DNSConfigurationError(MXError): """ Raised when the DNS entries for this domain are falsely configured. """ message = 'Misconfigurated DNS entries for domain.' -class NoMXError(EmailValidationError): - 'Raised then the domain has no MX records configured.' +class NoMXError(MXError): + 'Raised when the domain has no MX records configured.' message = 'No MX record for domain found.' -class NoValidMXError(EmailValidationError): +class NoValidMXError(MXError): + """ + Raised when the domain has MX records configured, but none of them + has a valid format. + """ message = 'No valid MX record for domain found.' -class AddressNotDeliverableError(EmailValidationError): - 'Raised when a non-ambigious resulted lookup fails.' - message = 'Email address undeliverable:' +class SMTPError(EmailValidationError): + """ + Base class for exceptions raised from unsuccessful SMTP + communication. - def __init__(self, error_messages: Iterable): + `error_messages` is a dictionary with an entry per MX record, where + the hostname is the key and a tuple of command, error code, and + error message is the value. + """ + def __init__(self, error_messages: Dict[str, Tuple[str, int, str]]): self.error_messages = error_messages def __str__(self) -> str: - return '\n'.join([self.message] + self.error_messages) + return '\n'.join( + [self.message] + + [f'{k}: {v[1]} {v[2]} (in reply to {v[0]})' + for k, v in self.error_messages.items()] + ) + + +class AddressNotDeliverableError(SMTPError): + """ + Raised when at least one of the MX sends an SMTP reply which + unambiguously indicate an invalid (nonexistant, blocked, expired...) + recipient email address. + + This exception indicates that the email address is clearly invalid. + """ + message = 'Email address undeliverable:' + + +class SMTPCommunicationError(SMTPError): + """ + Raised when the SMTP communication with all MX was unsuccessful for + other reasons than an invalid recipient email address. + + This exception indicates a configuration issue either on the host + where this program runs or on the MX. A possible reason is that the + local host ist blacklisted on the MX. + """ + message = 'SMTP communication failure:' + + +class SMTPTemporaryError(SMTPError): + """ + Raised when the email address cannot be verified because none of the + MX gave a clear "yes" or "no" about the existence of the address, + but at least one gave a temporary error reply to the "RCPT TO:" + command. + + This exception indicates that the validity of the email address + cannot be verified, either for reasons of MX configuration (like + greylisting) or due to temporary server issues on the MX. + """ + message = 'Temporary error in email address verification:' diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index d41e235..e906fe2 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -2,7 +2,7 @@ from logging import getLogger from smtplib import SMTP, SMTPNotSupportedError, SMTPServerDisconnected from socket import error as SocketError from socket import gethostname -from typing import Optional +from typing import Optional, Tuple from dns.exception import Timeout from dns.rdatatype import MX as rdtype_mx @@ -14,7 +14,8 @@ from .constants import HOST_REGEX from .email_address import EmailAddress from .exceptions import ( AddressNotDeliverableError, DNSConfigurationError, DNSTimeoutError, - DomainNotFoundError, NoMXError, NoNameserverError, NoValidMXError) + DomainNotFoundError, NoMXError, NoNameserverError, NoValidMXError, + SMTPCommunicationError, SMTPTemporaryError) LOGGER = getLogger(name=__name__) @@ -78,7 +79,7 @@ def _smtp_ehlo_tls(smtp: SMTP, helo_host: str): unavailable. """ code, message = smtp.ehlo(name=helo_host) - if code >= 300: + if code >= 400: # EHLO bails out, no further SMTP commands are acceptable raise _ProtocolError('EHLO', code, message) try: @@ -95,19 +96,21 @@ def _smtp_ehlo_tls(smtp: SMTP, helo_host: str): def _smtp_mail(smtp: SMTP, from_address: EmailAddress): 'Send and evaluate the `MAIL FROM` command.' code, message = smtp.mail(sender=from_address.ace) - if code >= 300: + if code >= 400: # MAIL FROM bails out, no further SMTP commands are acceptable raise _ProtocolError('MAIL FROM', code, message) def _smtp_converse( mx_record: str, smtp_timeout: int, debug: bool, helo_host: str, - from_address: EmailAddress, email_address: EmailAddress): + from_address: EmailAddress, email_address: EmailAddress + ) -> Tuple[int, str]: """ - Do the `SMTP` conversation, handle errors in the caller. + Do the `SMTP` conversation with one MX, and return code and message + of the reply to the `RCPT TO:` command. - Raise `_ProtocolError` on error, and `StopIteration` if the - conversation points out an existing email. + If the conversation fails before the `RCPT TO:` command can be + issued, a `_ProtocolError` is raised. """ if debug: LOGGER.debug(msg=f'Trying {mx_record} ...') @@ -119,70 +122,87 @@ def _smtp_converse( raise _ProtocolError('connect', code, message) _smtp_ehlo_tls(smtp=smtp, helo_host=helo_host) _smtp_mail(smtp=smtp, from_address=from_address) - code, message = smtp.rcpt(recip=email_address.ace) - if code == 250: - # Address valid, early exit - raise StopIteration - elif code >= 500: - raise _ProtocolError('RCPT TO', code, message) - - -def _check_one_mx( - error_messages: list, mx_record: str, helo_host: str, - from_address: EmailAddress, email_address: EmailAddress, - smtp_timeout: int, debug: bool) -> bool: - """ - Check one MX server, return the `is_ambigious` boolean or raise - `StopIteration` if this MX accepts the email. - """ - try: - _smtp_converse( - mx_record=mx_record, smtp_timeout=smtp_timeout, debug=debug, - helo_host=helo_host, from_address=from_address, - email_address=email_address) - except SMTPServerDisconnected: - return True - except (SocketError, _ProtocolError) as error: - error_messages.append(f'{mx_record}: {error}') - return False - return True + return smtp.rcpt(recip=email_address.ace) def _check_mx_records( mx_records: list, smtp_timeout: int, helo_host: str, from_address: EmailAddress, email_address: EmailAddress, - debug: bool) -> Optional[bool]: + debug: bool, raise_communication_errors: bool, + raise_temporary_errors: bool) -> Optional[bool]: 'Check the mx records for a given email address.' - # TODO: Raise an ambigious exception, containing the messages? Will - # be a breaking change. - error_messages = [] - found_ambigious = False + communication_errors = {} + temporary_errors = {} for mx_record in mx_records: try: - found_ambigious |= _check_one_mx( - error_messages=error_messages, mx_record=mx_record, + code, message = _smtp_converse( + mx_record=mx_record, smtp_timeout=smtp_timeout, debug=debug, helo_host=helo_host, from_address=from_address, - email_address=email_address, smtp_timeout=smtp_timeout, - debug=debug) - except StopIteration: - # Address valid, early exit - return True - # If any of the mx servers behaved ambigious, return None, otherwise raise - # an exception containing the collected error messages. - if not found_ambigious: - raise AddressNotDeliverableError(error_messages=error_messages) + email_address=email_address) + if code >= 500: + # Address clearly invalid: exit early. + raise AddressNotDeliverableError({mx_record: ( + 'RCPT TO', code, message.decode(errors='ignore'))}) + elif code >= 400: + # Temporary error on this MX: collect message and continue. + temporary_errors[mx_record] = ( + 'RCPT TO', code, message.decode(errors='ignore')) + else: + # Address clearly valid: exit early. + return True + except (SocketError, SMTPServerDisconnected) as error: + # Connection problem: collect message and continue. + communication_errors[mx_record] = ('connect', 0, error) + except _ProtocolError as error: + # SMTP communication error: collect message and continue. + communication_errors[mx_record] = ( + error.command, error.code, error.message) + # Raise exceptions on ambiguous results if desired. If in doubt, raise the + # CommunicationError because that one might point to local configuration or + # blacklisting issues. + if communication_errors and raise_communication_errors: + raise SMTPCommunicationError(communication_errors) + if temporary_errors and raise_temporary_errors: + raise SMTPTemporaryError(temporary_errors) + # Can't verify whether or not email address exists. + return None def mx_check( - email_address: EmailAddress, debug: bool, - from_address: Optional[EmailAddress] = None, - helo_host: Optional[str] = None, smtp_timeout: int = 10, - dns_timeout: int = 10, skip_smtp: bool = False -) -> Optional[bool]: + email_address: EmailAddress, debug: bool, + from_address: Optional[EmailAddress] = None, + helo_host: Optional[str] = None, smtp_timeout: int = 10, + dns_timeout: int = 10, skip_smtp: bool = False, + raise_communication_errors: bool = False, + raise_temporary_errors: bool = False + ) -> Optional[bool]: """ - Return `True` if the host responds with a deliverable response code, - `False` if not-deliverable. Also, return `None` if there if couldn't - provide a conclusive result (e.g. temporary errors or graylisting). + Verify the given email address by determining the SMTP servers + responsible for the domain and then asking them to deliver an + email to the address. Before the actual message is sent, the + process is interrupted. + + Returns `True` as soon as the any server accepts the recipient + address. + + Raises a `AddressNotDeliverableError` if any server unambiguously + and permanently refuses to accept the recipient address. + + If the server answers with a temporary error code, the validity of + the email address can not be determined. In that case, the function + returns `None`, or an `SMTPTemporaryError` is raised, dependent on + the value of `raise_temporary_errors`. Greylisting is a frequent + cause of this. + + If the SMTP server(s) reply with an error message to any of the + communication steps before the recipient address is checked, the + validity of the email address can not be determined either. In that + case, the function returns `None`, or an `SMTPCommunicationError` is + raised, dependent on the value of `raise_communication_errors`. + + In case no responsible SMTP servers can be determined, a variety of + exceptions is raised depending on the exact issue, all derived from + `MXError`. """ host = helo_host or gethostname() from_address = from_address or email_address @@ -195,4 +215,6 @@ def mx_check( return True return _check_mx_records( mx_records=mx_records, smtp_timeout=smtp_timeout, helo_host=host, - from_address=from_address, email_address=email_address, debug=debug) + from_address=from_address, email_address=email_address, debug=debug, + raise_communication_errors=raise_communication_errors, + raise_temporary_errors=raise_temporary_errors) diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index 769aa50..ab08926 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -16,7 +16,8 @@ def validate_email_or_fail( from_address: Optional[str] = None, helo_host: Optional[str] = None, smtp_timeout: int = 10, dns_timeout: int = 10, use_blacklist: bool = True, debug: bool = False, - skip_smtp: bool = False) -> Optional[bool]: + skip_smtp: bool = False, raise_communication_errors: bool = False, + raise_temporary_errors: bool = False) -> Optional[bool]: """ Return `True` if the email address validation is successful, `None` if the validation result is ambigious, and raise an exception if the validation @@ -38,7 +39,9 @@ def validate_email_or_fail( return mx_check( email_address=email_address, from_address=from_address, helo_host=helo_host, smtp_timeout=smtp_timeout, - dns_timeout=dns_timeout, skip_smtp=skip_smtp, debug=debug) + dns_timeout=dns_timeout, skip_smtp=skip_smtp, debug=debug, + raise_communication_errors=raise_communication_errors, + raise_temporary_errors=raise_temporary_errors) def validate_email(email_address: str, *args, **kwargs): From 59172783ce8bcdd4e6a880d720995b0746e3f7fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Mon, 22 Feb 2021 15:18:49 +0100 Subject: [PATCH 02/27] Fix indentation --- validate_email/mx_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index e906fe2..eeb68a1 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -104,7 +104,7 @@ def _smtp_mail(smtp: SMTP, from_address: EmailAddress): def _smtp_converse( mx_record: str, smtp_timeout: int, debug: bool, helo_host: str, from_address: EmailAddress, email_address: EmailAddress - ) -> Tuple[int, str]: + ) -> Tuple[int, str]: """ Do the `SMTP` conversation with one MX, and return code and message of the reply to the `RCPT TO:` command. @@ -175,7 +175,7 @@ def mx_check( dns_timeout: int = 10, skip_smtp: bool = False, raise_communication_errors: bool = False, raise_temporary_errors: bool = False - ) -> Optional[bool]: + ) -> Optional[bool]: """ Verify the given email address by determining the SMTP servers responsible for the domain and then asking them to deliver an From ef15fa994ab59364475dfb9bbef936863e295238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Tue, 23 Feb 2021 21:28:13 +0100 Subject: [PATCH 03/27] Restructure SMTP check code --- validate_email/mx_check.py | 280 ++++++++++++++++++++++--------------- 1 file changed, 168 insertions(+), 112 deletions(-) diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index eeb68a1..f70bd2f 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -1,8 +1,7 @@ from logging import getLogger -from smtplib import SMTP, SMTPNotSupportedError, SMTPServerDisconnected -from socket import error as SocketError -from socket import gethostname -from typing import Optional, Tuple +from smtplib import ( + SMTP, SMTPNotSupportedError, SMTPResponseException, SMTPServerDisconnected) +from typing import List, Optional from dns.exception import Timeout from dns.rdatatype import MX as rdtype_mx @@ -20,21 +19,6 @@ from .exceptions import ( LOGGER = getLogger(name=__name__) -class _ProtocolError(Exception): - """ - Raised when there is an error during the SMTP conversation. - Used only internally. - """ - - def __init__(self, command: str, code: int, message: bytes): - self.command = command - self.code = code - self.message = message.decode(errors='ignore') - - def __str__(self): - return f'{self.code} {self.message} (in reply to {self.command})' - - def _get_mx_records(domain: str, timeout: int) -> list: 'Return the DNS response for checking, optionally raise exceptions.' try: @@ -73,99 +57,171 @@ def _get_cleaned_mx_records(domain: str, timeout: int) -> list: return result -def _smtp_ehlo_tls(smtp: SMTP, helo_host: str): +class _SMTPChecker(SMTP): """ - Try and start the TLS session, fall back to unencrypted when - unavailable. + A specialized variant of `smtplib.SMTP` for checking the validity of + email addresses. + + All the commands used in the check process are modified to raise + appropriate exceptions: `SMTPServerDisconnected` on connection + issues and `SMTPResponseException` on negative SMTP server + responses. Note that the methods of `smtplib.SMTP` already raise + these exceptions on some conditions. + + Also, a new method `check` is added to run the check for a given + list of SMTP servers. """ - code, message = smtp.ehlo(name=helo_host) - if code >= 400: - # EHLO bails out, no further SMTP commands are acceptable - raise _ProtocolError('EHLO', code, message) - try: - smtp.starttls() - code, message = smtp.ehlo(name=helo_host) - except SMTPNotSupportedError: - # The server does not support the STARTTLS extension - pass - except RuntimeError: - # SSL/TLS support is not available to your Python interpreter - pass + def __init__( + self, local_hostname: str, timeout: float, debug: bool, + raise_communication_errors: bool, + raise_temporary_errors: bool, + sender: str, recip: str): + """ + Initialize the object with all the parameters which remain + constant during the check of one email address on all the SMTP + servers. + """ + super().__init__(local_hostname=local_hostname, timeout=timeout) + self.set_debuglevel(debuglevel=2 if debug else False) + self.__raise_communication_errors = raise_communication_errors + self.__raise_temporary_errors = raise_temporary_errors + self.__sender = sender + self.__recip = recip + self.__communication_errors = {} + self.__temporary_errors = {} + # Avoid error on close() after unsuccessful connect + self.sock = None + def putcmd(self, cmd, args=""): + """ + Like `smtplib.SMTP.putcmd`, but remember the command for later + use in error messages. + """ + if args: + self.__command = f'{cmd} {args}' + else: + self.__command = cmd + super().putcmd(cmd, args) -def _smtp_mail(smtp: SMTP, from_address: EmailAddress): - 'Send and evaluate the `MAIL FROM` command.' - code, message = smtp.mail(sender=from_address.ace) - if code >= 400: - # MAIL FROM bails out, no further SMTP commands are acceptable - raise _ProtocolError('MAIL FROM', code, message) - - -def _smtp_converse( - mx_record: str, smtp_timeout: int, debug: bool, helo_host: str, - from_address: EmailAddress, email_address: EmailAddress - ) -> Tuple[int, str]: - """ - Do the `SMTP` conversation with one MX, and return code and message - of the reply to the `RCPT TO:` command. - - If the conversation fails before the `RCPT TO:` command can be - issued, a `_ProtocolError` is raised. - """ - if debug: - LOGGER.debug(msg=f'Trying {mx_record} ...') - with SMTP(timeout=smtp_timeout) as smtp: - smtp._host = mx_record # Workaround for bug in smtplib - smtp.set_debuglevel(debuglevel=2 if debug else False) - code, message = smtp.connect(host=mx_record) - if code >= 400: - raise _ProtocolError('connect', code, message) - _smtp_ehlo_tls(smtp=smtp, helo_host=helo_host) - _smtp_mail(smtp=smtp, from_address=from_address) - return smtp.rcpt(recip=email_address.ace) - - -def _check_mx_records( - mx_records: list, smtp_timeout: int, helo_host: str, - from_address: EmailAddress, email_address: EmailAddress, - debug: bool, raise_communication_errors: bool, - raise_temporary_errors: bool) -> Optional[bool]: - 'Check the mx records for a given email address.' - communication_errors = {} - temporary_errors = {} - for mx_record in mx_records: + def connect(self, host, *args, **kwargs): + """ + Like `smtplib.SMTP.connect`, but raise appropriate exceptions on + connection failure or negative SMTP server response. + """ + self.__command = 'connect' # Used for error messages. + self._host = host # Missing in standard smtplib! try: - code, message = _smtp_converse( - mx_record=mx_record, smtp_timeout=smtp_timeout, debug=debug, - helo_host=helo_host, from_address=from_address, - email_address=email_address) - if code >= 500: - # Address clearly invalid: exit early. - raise AddressNotDeliverableError({mx_record: ( - 'RCPT TO', code, message.decode(errors='ignore'))}) - elif code >= 400: - # Temporary error on this MX: collect message and continue. - temporary_errors[mx_record] = ( - 'RCPT TO', code, message.decode(errors='ignore')) - else: - # Address clearly valid: exit early. + code, message = super().connect(host, *args, **kwargs) + except OSError as error: + raise SMTPServerDisconnected(str(error)) + if code >= 400: + raise SMTPResponseException(code, message) + return code, message + + def starttls(self, *args, **kwargs): + """ + Like `smtplib.SMTP.starttls`, but continue without TLS in case + either end of the connection does not support it. + """ + try: + super().starttls(*args, **kwargs) + except SMTPNotSupportedError: + # The server does not support the STARTTLS extension + pass + except RuntimeError: + # SSL/TLS support is not available to your Python interpreter + pass + + def mail(self, *args, **kwargs): + """ + Like `smtplib.SMTP.mail`, but raise an appropriate exception on + negative SMTP server response. + """ + code, message = super().mail(*args, **kwargs) + if code >= 400: + raise SMTPResponseException(code, message) + return code, message + + def rcpt(self, *args, **kwargs): + """ + Like `smtplib.SMTP.rcpt`, but handle negative SMTP server + responses directly. + """ + code, message = super().rcpt(*args, **kwargs) + if code >= 500: + # Address clearly invalid: issue negative result + raise AddressNotDeliverableError({self._host: ( + 'RCPT TO', code, message.decode(errors='ignore'))}) + elif code >= 400: + # Temporary error on this host: collect message + self.__temporary_errors[self._host] = ( + 'RCPT TO', code, message.decode(errors='ignore')) + return code, message + + def quit(self): + """ + Like `smtplib.SMTP.quit`, but make sure that everything is + cleaned up properly even if the connection has been lost before. + """ + try: + return super().quit() + except SMTPServerDisconnected: + self.ehlo_resp = self.helo_resp = None + self.esmtp_features = {} + self.does_esmtp = False + self.close() + + def _check_one(self, host: str) -> bool: + """ + Run the check for one SMTP server. On positive result, return + `True`. On negative result, raise `AddressNotDeliverableError`. + On ambiguous result (4xx response to `RCPT TO`) or any + communication issue before even reaching `RCPT TO` in the + protocol, collect error message for later use and return + `False`. + """ + try: + self.connect(host) + self.starttls() + self.ehlo_or_helo_if_needed() + self.mail(self.__sender) + code, message = self.rcpt(self.__recip) + except SMTPServerDisconnected as e: + self.__communication_errors[self._host] = ( + self.__command, 0, str(e)) + return False + except SMTPResponseException as e: + self.__communication_errors[self._host] = ( + self.__command, e.smtp_code, + e.smtp_error.decode(errors='ignore')) + return False + finally: + self.quit() + return (code < 400) + + def check(self, hosts: List[str]) -> Optional[bool]: + """ + Run the check for all given SMTP servers. On positive result, + return `True`. On negative result, raise + `AddressNotDeliverableError`. On ambiguous result (4xx + response(s) to `RCPT TO`) or any communication issue(s) before + even reaching `RCPT TO` in the protocol, either raise an + exception or return `None` depending on the parameters. + """ + for host in hosts: + if self.debuglevel > 0: + LOGGER.debug(msg=f'Trying {host} ...') + if self._check_one(host): return True - except (SocketError, SMTPServerDisconnected) as error: - # Connection problem: collect message and continue. - communication_errors[mx_record] = ('connect', 0, error) - except _ProtocolError as error: - # SMTP communication error: collect message and continue. - communication_errors[mx_record] = ( - error.command, error.code, error.message) - # Raise exceptions on ambiguous results if desired. If in doubt, raise the - # CommunicationError because that one might point to local configuration or - # blacklisting issues. - if communication_errors and raise_communication_errors: - raise SMTPCommunicationError(communication_errors) - if temporary_errors and raise_temporary_errors: - raise SMTPTemporaryError(temporary_errors) - # Can't verify whether or not email address exists. - return None + # Raise exceptions on ambiguous results if desired. If in doubt, raise + # the CommunicationError because that one might point to local + # configuration or blacklisting issues. + if self.__communication_errors and self.__raise_communication_errors: + raise SMTPCommunicationError(self.__communication_errors) + if self.__temporary_errors and self.__raise_temporary_errors: + raise SMTPTemporaryError(self.__temporary_errors) + # Can't verify whether or not email address exists. + return None def mx_check( @@ -204,7 +260,6 @@ def mx_check( exceptions is raised depending on the exact issue, all derived from `MXError`. """ - host = helo_host or gethostname() from_address = from_address or email_address if email_address.domain_literal_ip: mx_records = [email_address.domain_literal_ip] @@ -213,8 +268,9 @@ def mx_check( domain=email_address.domain, timeout=dns_timeout) if skip_smtp: return True - return _check_mx_records( - mx_records=mx_records, smtp_timeout=smtp_timeout, helo_host=host, - from_address=from_address, email_address=email_address, debug=debug, - raise_communication_errors=raise_communication_errors, - raise_temporary_errors=raise_temporary_errors) + smtp_checker = _SMTPChecker( + local_hostname=helo_host, timeout=smtp_timeout, debug=debug, + raise_communication_errors=raise_communication_errors, + raise_temporary_errors=raise_temporary_errors, + sender=from_address.ace, recip=email_address.ace) + return smtp_checker.check(mx_records) From 903925afc993f5ced23b84c37ab8cb627439a88c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Sun, 28 Feb 2021 15:01:37 +0100 Subject: [PATCH 04/27] Code cleanup, tests follow --- README.rst | 6 +- validate_email/exceptions.py | 28 ++++--- validate_email/mx_check.py | 134 ++++++++++++++----------------- validate_email/validate_email.py | 33 +++++--- 4 files changed, 101 insertions(+), 100 deletions(-) diff --git a/README.rst b/README.rst index 95e6913..923707e 100644 --- a/README.rst +++ b/README.rst @@ -45,11 +45,7 @@ Basic usage:: :code:`skip_smtp`: (default :code:`False`) skip the SMTP conversation with the server, after MX checks. Will automatically be set to :code:`True` when :code:`check_mx` is :code:`False`! -:code:`raise_communication_errors`: Affects the SMTP verification step. If set to :code:`True`, any connection error or SMTP error message from the server will lead to a negative verification result, otherwise it will be regarded as an ambiguous result. Defaults to :code:`False`. This option is mainly used in connection with :code:`validate_email_or_fail()`, where the exception raised can be analyzed to find out the reason for the otherwise ambiguous result. - -:code:`raise_temporary_errors`: Affects the SMTP verification step. If set to :code:`True`, a temporary error reply of the SMTP server to the :code:`RCPT TO` command (as used, for example, with greylisting) will lead to a negative verification result, otherwise it will be regarded as an ambiguous result. Defaults to :code:`False`. This option is mainly used in connection with :code:`validate_email_or_fail()`, where the exception raised can be analyzed to find out the reason for the otherwise ambiguous result. - -The function :code:`validate_email_or_fail()` works exactly like :code:`validate_email`, except that it raises an exception in the case of validation failure instead of returning :code:`False`. +The function :code:`validate_email_or_fail()` works exactly like :code:`validate_email`, except that it raises an exception in the case of validation failure and ambiguous result instead of returning :code:`False` or :code:`None`, respectively. The module will try to negotiate a TLS connection with STARTTLS, and silently fall back to an unencrypted SMTP connection if the server doesn't support it. diff --git a/validate_email/exceptions.py b/validate_email/exceptions.py index cbffee8..9b110ac 100644 --- a/validate_email/exceptions.py +++ b/validate_email/exceptions.py @@ -1,4 +1,8 @@ -from typing import Dict, Tuple +from collections import namedtuple +from typing import Dict + +SMTPMessage = namedtuple( + typename='SmtpErrorMessage', field_names=['command', 'code', 'text']) class Error(Exception): @@ -13,7 +17,6 @@ class ParameterError(Error): """ Base class for all exceptions indicating a wrong function parameter. """ - pass class FromAddressFormatError(ParameterError): @@ -26,7 +29,6 @@ class FromAddressFormatError(ParameterError): class EmailValidationError(Error): 'Base class for all exceptions indicating validation failure.' - pass class AddressFormatError(EmailValidationError): @@ -47,7 +49,6 @@ class MXError(EmailValidationError): Base class of all exceptions that indicate failure to determine a valid MX for the domain of email address. """ - pass class DomainNotFoundError(MXError): @@ -90,19 +91,20 @@ class SMTPError(EmailValidationError): Base class for exceptions raised from unsuccessful SMTP communication. - `error_messages` is a dictionary with an entry per MX record, where - the hostname is the key and a tuple of command, error code, and - error message is the value. + `error_messages` is a dictionary with a `SMTPMessage` per MX record, + where the hostname is the key and a tuple of command, error code, + and error message is the value. """ - def __init__(self, error_messages: Dict[str, Tuple[str, int, str]]): + + def __init__(self, error_messages: Dict[str, SMTPMessage]): self.error_messages = error_messages def __str__(self) -> str: - return '\n'.join( - [self.message] + - [f'{k}: {v[1]} {v[2]} (in reply to {v[0]})' - for k, v in self.error_messages.items()] - ) + return '\n'.join([self.message] + [ + f'{host}: {message.code} {message.text} ' + f'(in reply to {message.command})' + for host, message in self.error_messages.items() + ]) class AddressNotDeliverableError(SMTPError): diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index f70bd2f..677fa8f 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -1,7 +1,7 @@ from logging import getLogger from smtplib import ( SMTP, SMTPNotSupportedError, SMTPResponseException, SMTPServerDisconnected) -from typing import List, Optional +from typing import List, Optional, Tuple from dns.exception import Timeout from dns.rdatatype import MX as rdtype_mx @@ -14,7 +14,7 @@ from .email_address import EmailAddress from .exceptions import ( AddressNotDeliverableError, DNSConfigurationError, DNSTimeoutError, DomainNotFoundError, NoMXError, NoNameserverError, NoValidMXError, - SMTPCommunicationError, SMTPTemporaryError) + SMTPCommunicationError, SMTPMessage, SMTPTemporaryError) LOGGER = getLogger(name=__name__) @@ -71,11 +71,10 @@ class _SMTPChecker(SMTP): Also, a new method `check` is added to run the check for a given list of SMTP servers. """ + def __init__( self, local_hostname: str, timeout: float, debug: bool, - raise_communication_errors: bool, - raise_temporary_errors: bool, - sender: str, recip: str): + sender: EmailAddress, recip: EmailAddress): """ Initialize the object with all the parameters which remain constant during the check of one email address on all the SMTP @@ -83,8 +82,6 @@ class _SMTPChecker(SMTP): """ super().__init__(local_hostname=local_hostname, timeout=timeout) self.set_debuglevel(debuglevel=2 if debug else False) - self.__raise_communication_errors = raise_communication_errors - self.__raise_temporary_errors = raise_temporary_errors self.__sender = sender self.__recip = recip self.__communication_errors = {} @@ -92,7 +89,7 @@ class _SMTPChecker(SMTP): # Avoid error on close() after unsuccessful connect self.sock = None - def putcmd(self, cmd, args=""): + def putcmd(self, cmd: str, args: str = ''): """ Like `smtplib.SMTP.putcmd`, but remember the command for later use in error messages. @@ -101,21 +98,25 @@ class _SMTPChecker(SMTP): self.__command = f'{cmd} {args}' else: self.__command = cmd - super().putcmd(cmd, args) + super().putcmd(cmd=cmd, args=args) - def connect(self, host, *args, **kwargs): + def connect( + self, host: str = 'localhost', port: int = 0, + source_address: str = None) -> Tuple[int, str]: """ Like `smtplib.SMTP.connect`, but raise appropriate exceptions on connection failure or negative SMTP server response. + A code > 400 is an error here. """ self.__command = 'connect' # Used for error messages. - self._host = host # Missing in standard smtplib! + self._host = host # Workaround: Missing in standard smtplib! try: - code, message = super().connect(host, *args, **kwargs) + code, message = super().connect( + host=host, port=port, source_address=source_address) except OSError as error: raise SMTPServerDisconnected(str(error)) if code >= 400: - raise SMTPResponseException(code, message) + raise SMTPResponseException(code=code, msg=message) return code, message def starttls(self, *args, **kwargs): @@ -132,30 +133,34 @@ class _SMTPChecker(SMTP): # SSL/TLS support is not available to your Python interpreter pass - def mail(self, *args, **kwargs): + def mail(self, sender: str, options: tuple = ()): """ Like `smtplib.SMTP.mail`, but raise an appropriate exception on negative SMTP server response. + A code > 400 is an error here. """ - code, message = super().mail(*args, **kwargs) + code, message = super().mail(sender=sender, options=options) if code >= 400: raise SMTPResponseException(code, message) return code, message - def rcpt(self, *args, **kwargs): + def rcpt(self, recip: str, options: tuple = ()): """ Like `smtplib.SMTP.rcpt`, but handle negative SMTP server responses directly. """ - code, message = super().rcpt(*args, **kwargs) + code, message = super().rcpt(recip=recip, options=options) if code >= 500: # Address clearly invalid: issue negative result - raise AddressNotDeliverableError({self._host: ( - 'RCPT TO', code, message.decode(errors='ignore'))}) + raise AddressNotDeliverableError({ + self._host: SMTPMessage( + command='RCPT TO', code=code, + text=message.decode(errors='ignore'))}) elif code >= 400: # Temporary error on this host: collect message - self.__temporary_errors[self._host] = ( - 'RCPT TO', code, message.decode(errors='ignore')) + self.__temporary_errors[self._host] = SMTPMessage( + command='RCPT TO', code=code, + text=message.decode(errors='ignore')) return code, message def quit(self): @@ -173,55 +178,50 @@ class _SMTPChecker(SMTP): def _check_one(self, host: str) -> bool: """ - Run the check for one SMTP server. On positive result, return - `True`. On negative result, raise `AddressNotDeliverableError`. - On ambiguous result (4xx response to `RCPT TO`) or any - communication issue before even reaching `RCPT TO` in the - protocol, collect error message for later use and return - `False`. + Run the check for one SMTP server. + + Return `True` on positive result. + + Return `False` on ambiguous result (4xx response to `RCPT TO`), + while collecting the error message for later use. + + Raise `AddressNotDeliverableError`. on negative result. """ try: - self.connect(host) + self.connect(host=host) self.starttls() self.ehlo_or_helo_if_needed() - self.mail(self.__sender) - code, message = self.rcpt(self.__recip) + self.mail(sender=self.__sender.ace) + code, message = self.rcpt(recip=self.__recip.ace) except SMTPServerDisconnected as e: - self.__communication_errors[self._host] = ( - self.__command, 0, str(e)) + self.__communication_errors[self._host] = SMTPMessage( + command=self.__command, code=0, text=str(e)) return False except SMTPResponseException as e: - self.__communication_errors[self._host] = ( - self.__command, e.smtp_code, - e.smtp_error.decode(errors='ignore')) + self.__communication_errors[self._host] = SMTPMessage( + command=self.__command, code=e.smtp_code, + text=e.smtp_error.decode(errors='ignore')) return False finally: self.quit() - return (code < 400) + return code < 400 def check(self, hosts: List[str]) -> Optional[bool]: """ Run the check for all given SMTP servers. On positive result, - return `True`. On negative result, raise - `AddressNotDeliverableError`. On ambiguous result (4xx - response(s) to `RCPT TO`) or any communication issue(s) before - even reaching `RCPT TO` in the protocol, either raise an - exception or return `None` depending on the parameters. + return `True`, else raise exceptions described in `mx_check`. """ for host in hosts: if self.debuglevel > 0: LOGGER.debug(msg=f'Trying {host} ...') - if self._check_one(host): + if self._check_one(host=host): return True - # Raise exceptions on ambiguous results if desired. If in doubt, raise - # the CommunicationError because that one might point to local - # configuration or blacklisting issues. - if self.__communication_errors and self.__raise_communication_errors: + # Raise appropriate exceptions when necessary + if self.__communication_errors: raise SMTPCommunicationError(self.__communication_errors) - if self.__temporary_errors and self.__raise_temporary_errors: + elif self.__temporary_errors: raise SMTPTemporaryError(self.__temporary_errors) - # Can't verify whether or not email address exists. - return None + # Can't verify whether or not email address exists, return None def mx_check( @@ -229,32 +229,22 @@ def mx_check( from_address: Optional[EmailAddress] = None, helo_host: Optional[str] = None, smtp_timeout: int = 10, dns_timeout: int = 10, skip_smtp: bool = False, - raise_communication_errors: bool = False, - raise_temporary_errors: bool = False - ) -> Optional[bool]: +) -> Optional[bool]: """ - Verify the given email address by determining the SMTP servers - responsible for the domain and then asking them to deliver an - email to the address. Before the actual message is sent, the - process is interrupted. - Returns `True` as soon as the any server accepts the recipient address. - Raises a `AddressNotDeliverableError` if any server unambiguously + Raise an `AddressNotDeliverableError` if any server unambiguously and permanently refuses to accept the recipient address. - If the server answers with a temporary error code, the validity of - the email address can not be determined. In that case, the function - returns `None`, or an `SMTPTemporaryError` is raised, dependent on - the value of `raise_temporary_errors`. Greylisting is a frequent - cause of this. + Raise `SMTPTemporaryError` if the server answers with a temporary + error code when validity of the email address can not be + determined. Greylisting is a frequent cause of this. - If the SMTP server(s) reply with an error message to any of the - communication steps before the recipient address is checked, the - validity of the email address can not be determined either. In that - case, the function returns `None`, or an `SMTPCommunicationError` is - raised, dependent on the value of `raise_communication_errors`. + Raise `SMTPCommunicationError` if the SMTP server(s) reply with an + error message to any of the communication steps before the recipient + address is checked, and the validity of the email address can not be + determined either. In case no responsible SMTP servers can be determined, a variety of exceptions is raised depending on the exact issue, all derived from @@ -269,8 +259,6 @@ def mx_check( if skip_smtp: return True smtp_checker = _SMTPChecker( - local_hostname=helo_host, timeout=smtp_timeout, debug=debug, - raise_communication_errors=raise_communication_errors, - raise_temporary_errors=raise_temporary_errors, - sender=from_address.ace, recip=email_address.ace) - return smtp_checker.check(mx_records) + local_hostname=helo_host, timeout=smtp_timeout, debug=debug, + sender=from_address, recip=email_address) + return smtp_checker.check(hosts=mx_records) diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index ab08926..9165504 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -4,24 +4,36 @@ from typing import Optional from .domainlist_check import domainlist_check from .email_address import EmailAddress from .exceptions import ( - AddressFormatError, EmailValidationError, FromAddressFormatError) + AddressFormatError, EmailValidationError, FromAddressFormatError, + SMTPTemporaryError) from .mx_check import mx_check from .regex_check import regex_check LOGGER = getLogger(name=__name__) +__doc__ = """\ +Verify the given email address by determining the SMTP servers +responsible for the domain and then asking them to deliver an email to +the address. Before the actual message is sent, the process is +interrupted. + +PLEASE NOTE: Some email providers only tell the actual delivery failure +AFTER having delivered the body which this module doesn't, while others +simply accept everything and send a bounce notification later. Hence, a +100% proper response is not guaranteed. +""" + def validate_email_or_fail( email_address: str, check_regex: bool = True, check_mx: bool = True, from_address: Optional[str] = None, helo_host: Optional[str] = None, smtp_timeout: int = 10, dns_timeout: int = 10, use_blacklist: bool = True, debug: bool = False, - skip_smtp: bool = False, raise_communication_errors: bool = False, - raise_temporary_errors: bool = False) -> Optional[bool]: + skip_smtp: bool = False) -> Optional[bool]: """ - Return `True` if the email address validation is successful, `None` if the - validation result is ambigious, and raise an exception if the validation - fails. + Return `True` if the email address validation is successful, `None` + if the validation result is ambigious, and raise an exception if the + validation fails. """ email_address = EmailAddress(email_address) if from_address is not None: @@ -39,9 +51,7 @@ def validate_email_or_fail( return mx_check( email_address=email_address, from_address=from_address, helo_host=helo_host, smtp_timeout=smtp_timeout, - dns_timeout=dns_timeout, skip_smtp=skip_smtp, debug=debug, - raise_communication_errors=raise_communication_errors, - raise_temporary_errors=raise_temporary_errors) + dns_timeout=dns_timeout, skip_smtp=skip_smtp, debug=debug) def validate_email(email_address: str, *args, **kwargs): @@ -53,6 +63,11 @@ def validate_email(email_address: str, *args, **kwargs): """ try: return validate_email_or_fail(email_address, *args, **kwargs) + except SMTPTemporaryError as error: + message = f'Validation for {email_address!r} ambigious: {error}' + if kwargs.get('debug'): + LOGGER.warning(msg=message) + return except EmailValidationError as error: message = f'Validation for {email_address!r} failed: {error}' if kwargs.get('debug'): From 5a277113979d7567fae700a51ad5fce4a5017629 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 11:37:48 +0100 Subject: [PATCH 05/27] Overhauling response handling --- validate_email/exceptions.py | 22 ++++++++++++++++++- validate_email/mx_check.py | 36 +++++++++++++++++++------------- validate_email/validate_email.py | 1 - 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/validate_email/exceptions.py b/validate_email/exceptions.py index 9b110ac..9bdea18 100644 --- a/validate_email/exceptions.py +++ b/validate_email/exceptions.py @@ -86,9 +86,29 @@ class NoValidMXError(MXError): message = 'No valid MX record for domain found.' +class SMTPNonSuccessError(EmailValidationError): + 'Raised when a 4xx or 5xx response is received.' + + def __init__(self, command: str, code: int, text: str): + self.command = command + self.code = code + self.text = text + + def __str__(self) -> str: + return ( + f'{self.message}: {self.code} {self.text} ' + '(in reply to {self.command})') + + @property + def smtp_message(self) -> SMTPMessage: + 'Return an `SMTPMessage` from this exception.' + return SMTPMessage( + command=self.command, code=self.code, text=self.text) + + class SMTPError(EmailValidationError): """ - Base class for exceptions raised from unsuccessful SMTP + Base class for exceptions raised in the end from unsuccessful SMTP communication. `error_messages` is a dictionary with a `SMTPMessage` per MX record, diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index 677fa8f..b61fe37 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -14,7 +14,8 @@ from .email_address import EmailAddress from .exceptions import ( AddressNotDeliverableError, DNSConfigurationError, DNSTimeoutError, DomainNotFoundError, NoMXError, NoNameserverError, NoValidMXError, - SMTPCommunicationError, SMTPMessage, SMTPTemporaryError) + SMTPCommunicationError, SMTPMessage, SMTPNonSuccessError, + SMTPTemporaryError) LOGGER = getLogger(name=__name__) @@ -106,7 +107,6 @@ class _SMTPChecker(SMTP): """ Like `smtplib.SMTP.connect`, but raise appropriate exceptions on connection failure or negative SMTP server response. - A code > 400 is an error here. """ self.__command = 'connect' # Used for error messages. self._host = host # Workaround: Missing in standard smtplib! @@ -116,7 +116,8 @@ class _SMTPChecker(SMTP): except OSError as error: raise SMTPServerDisconnected(str(error)) if code >= 400: - raise SMTPResponseException(code=code, msg=message) + raise SMTPNonSuccessError( + command=self.__command, code=code, text=message) return code, message def starttls(self, *args, **kwargs): @@ -141,7 +142,9 @@ class _SMTPChecker(SMTP): """ code, message = super().mail(sender=sender, options=options) if code >= 400: - raise SMTPResponseException(code, message) + raise SMTPNonSuccessError( + command=self.__command, code=code, + text=message.decode(errors='ignore')) return code, message def rcpt(self, recip: str, options: tuple = ()): @@ -157,9 +160,8 @@ class _SMTPChecker(SMTP): command='RCPT TO', code=code, text=message.decode(errors='ignore'))}) elif code >= 400: - # Temporary error on this host: collect message - self.__temporary_errors[self._host] = SMTPMessage( - command='RCPT TO', code=code, + raise SMTPNonSuccessError( + command=self.__command, code=code, text=message.decode(errors='ignore')) return code, message @@ -202,11 +204,17 @@ class _SMTPChecker(SMTP): command=self.__command, code=e.smtp_code, text=e.smtp_error.decode(errors='ignore')) return False + except SMTPNonSuccessError as e: + if e.code >= 500: + self.__communication_errors[self._host] = e.smtp_message + else: + self.__temporary_errors[self._host] = e.smtp_message + return False finally: self.quit() return code < 400 - def check(self, hosts: List[str]) -> Optional[bool]: + def check(self, hosts: List[str]) -> bool: """ Run the check for all given SMTP servers. On positive result, return `True`, else raise exceptions described in `mx_check`. @@ -218,18 +226,17 @@ class _SMTPChecker(SMTP): return True # Raise appropriate exceptions when necessary if self.__communication_errors: - raise SMTPCommunicationError(self.__communication_errors) + raise SMTPCommunicationError( + error_messages=self.__communication_errors) elif self.__temporary_errors: - raise SMTPTemporaryError(self.__temporary_errors) - # Can't verify whether or not email address exists, return None + raise SMTPTemporaryError(error_messages=self.__temporary_errors) def mx_check( email_address: EmailAddress, debug: bool, from_address: Optional[EmailAddress] = None, helo_host: Optional[str] = None, smtp_timeout: int = 10, - dns_timeout: int = 10, skip_smtp: bool = False, -) -> Optional[bool]: + dns_timeout: int = 10, skip_smtp: bool = False) -> bool: """ Returns `True` as soon as the any server accepts the recipient address. @@ -239,7 +246,8 @@ def mx_check( Raise `SMTPTemporaryError` if the server answers with a temporary error code when validity of the email address can not be - determined. Greylisting is a frequent cause of this. + determined. Greylisting or server delivery issues can be a cause for + this. Raise `SMTPCommunicationError` if the SMTP server(s) reply with an error message to any of the communication steps before the recipient diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index 9165504..0786fde 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -41,7 +41,6 @@ def validate_email_or_fail( from_address = EmailAddress(from_address) except AddressFormatError: raise FromAddressFormatError - if check_regex: regex_check(email_address) if use_blacklist: From d0301bb968a723f773aacd4be9860e3350aec3d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 11:56:09 +0100 Subject: [PATCH 06/27] Refactor to FAQ for issue template --- FAQ.md | 46 ++++++++++++++++++++++++++++++++++++++++++++++ README.rst | 25 +++---------------------- 2 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 FAQ.md diff --git a/FAQ.md b/FAQ.md new file mode 100644 index 0000000..81a2f1c --- /dev/null +++ b/FAQ.md @@ -0,0 +1,46 @@ +# FAQ: + +## The module provides false positives: + +Some SMTP Servers (Yahoo's servers for example) are only rejecting +nonexistent emails after the end of `DATA` command has been provided in +the conversation with the server. This module only goes until the +`RCPT TO` and says it's valid if it doesn't get rejected there, since +the `DATA` part of the email is the email body itself. There's not much +one can do with it, you have to accept false positives in the case of +yahoo.com and some other providers. I'm not sure if rejecting emails +after the `DATA` command is a valid behavior based on the SMTP RFC, but +I wouldn't wonder if not. + +## Everything gets rejected: + +Check if you have port 25 access from your IP to the accepting server's +IP. Even if you do, the server might use RBL's (spamhaus.org lists, for +example), and your IP might get rejected because of being listed in one +of the used lists by the email server. Your best bet is to use this +module on another server that delivers emails, thus eliminating the +chance of being blacklisted. + +## I can't check thousands of emails! + +This module is a tool; every tool can become a weapon if not used +properly. In my case, I use this module to check email address validity +at registration time, so not thousands at once. Doing so might make you +(your IP) end up in one of the aforementioned blocklists, as providers +will detect you as a possible spammer. In short, I would advise against +your use case. + +## My email doesn't check out! + +Run this code with the module installed (use your parameters within), +and see the output: + +```python +python -c 'import logging, sys; logging.basicConfig(stream=sys.stderr, level=logging.DEBUG); from validate_email import validate_email; print(validate_email(\'your.email@address.com\', check_mx=True, debug=True))' +``` + +If you still don't understand why your code doesn't work as expected by +looking at the the logs, then (and only then) add an issue explaining +your problem with a REPRODUCIBLE example, and the output of your test +run. + diff --git a/README.rst b/README.rst index 923707e..09cd8c8 100644 --- a/README.rst +++ b/README.rst @@ -66,25 +66,6 @@ The update can be triggered manually:: :code:`callback`: An optional `Callable` (function/method) to be called when the update is done. -FAQ: -======== -The module provides false positives: ------------------------------------- -Some SMTP Servers (Yahoo's servers for example) are only rejecting nonexistent emails after the end of ``DATA`` command has been provided in the conversation with the server. This module only goes until the ``RCPT TO`` and says it's valid if it doesn't get rejected there, since the ``DATA`` part of the email is the email body itself. There's not much one can do with it, you have to accept false positives in the case of yahoo.com and some other providers. I'm not sure if rejecting emails after the ``DATA`` command is a valid behavior based on the SMTP RFC, but I wouldn't wonder if not. - -Everything gets rejected: -------------------------- -Check if you have port 25 access from your IP to the accepting server's IP. Even if you do, the server might use RBL's (spamhaus.org lists, for example), and your IP might get rejected because of being listed in one of the used lists by the email server. Your best bet is to use this module on another server that delivers emails, thus eliminating the chance of being blacklisted. - -I can't check thousands of emails! ----------------------------------- -This module is a tool; every tool can become a weapon if not used properly. In my case, I use this module to check email address validity at registration time, so not thousands at once. Doing so might make you (your IP) end up in one of the aforementioned blocklists, as providers will detect you as a possible spammer. In short, I would advise against your use case. - -My email doesn't check out! ---------------------------- -Run this code with the module installed (use your parameters within), and see the output:: - - python -c 'import logging, sys; logging.basicConfig(stream=sys.stderr, level=logging.DEBUG); from validate_email import validate_email; print(validate_email(\'your.email@address.com\', check_mx=True, debug=True))' - - -If you still don't understand why your code doesn't work as expected by looking at the the logs, then (and only then) add an issue explaining your problem with a REPRODUCIBLE example, and the output of your test run. +Read the FAQ_! +============================ +.. _FAQ: https://github.com/karolyi/py3-validate-email/blob/master/FAQ.md From 184a15a5310ade234b680f9f09fb9c224cce7009 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 10:53:30 +0000 Subject: [PATCH 07/27] Update issue templates --- .github/ISSUE_TEMPLATE/bug_report.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.md diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..3c8dd05 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,27 @@ +--- +name: Bug report +about: Create a report to help us improve +title: "[BUG]" +labels: '' +assignees: '' + +--- + +- [ ] I have read and understood the [FAQ](https://github.com/karolyi/py3-validate-email/blob/master/FAQ.md) + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Desktop (please complete the following information):** + - OS: [e.g. Linux, FreeBSD, Windows] + - Flavor and Version [e.g. Debian 22, FreeBSD 12.2] +- Your network environment (ISP provided home connecton, or testing from an actual whitelisted server) + +**Additional context** +Add any other context about the problem here. From b258d2735f407179a49e73811c8ff9870ab9d3dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 11:58:48 +0100 Subject: [PATCH 08/27] Update bug template --- .github/ISSUE_TEMPLATE/bug_report.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 3c8dd05..8a09ca9 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -12,8 +12,8 @@ assignees: '' **Describe the bug** A clear and concise description of what the bug is. -**To Reproduce** -Steps to reproduce the behavior: +**My debug output** +Output from the debug run described in the FAQ: **Expected behavior** A clear and concise description of what you expected to happen. From 1999e1d433c661f8e3dcfd80f7d8a6fb1165b5e5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 17:15:58 +0100 Subject: [PATCH 09/27] Simplifying code --- validate_email/exceptions.py | 22 +--------------------- validate_email/mx_check.py | 24 ++++++++---------------- 2 files changed, 9 insertions(+), 37 deletions(-) diff --git a/validate_email/exceptions.py b/validate_email/exceptions.py index 9bdea18..5d942a7 100644 --- a/validate_email/exceptions.py +++ b/validate_email/exceptions.py @@ -86,26 +86,6 @@ class NoValidMXError(MXError): message = 'No valid MX record for domain found.' -class SMTPNonSuccessError(EmailValidationError): - 'Raised when a 4xx or 5xx response is received.' - - def __init__(self, command: str, code: int, text: str): - self.command = command - self.code = code - self.text = text - - def __str__(self) -> str: - return ( - f'{self.message}: {self.code} {self.text} ' - '(in reply to {self.command})') - - @property - def smtp_message(self) -> SMTPMessage: - 'Return an `SMTPMessage` from this exception.' - return SMTPMessage( - command=self.command, code=self.code, text=self.text) - - class SMTPError(EmailValidationError): """ Base class for exceptions raised in the end from unsuccessful SMTP @@ -122,7 +102,7 @@ class SMTPError(EmailValidationError): def __str__(self) -> str: return '\n'.join([self.message] + [ f'{host}: {message.code} {message.text} ' - f'(in reply to {message.command})' + f'(in reply to {message.command!r})' for host, message in self.error_messages.items() ]) diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index b61fe37..3c8fac1 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -14,8 +14,7 @@ from .email_address import EmailAddress from .exceptions import ( AddressNotDeliverableError, DNSConfigurationError, DNSTimeoutError, DomainNotFoundError, NoMXError, NoNameserverError, NoValidMXError, - SMTPCommunicationError, SMTPMessage, SMTPNonSuccessError, - SMTPTemporaryError) + SMTPCommunicationError, SMTPMessage, SMTPTemporaryError) LOGGER = getLogger(name=__name__) @@ -116,8 +115,7 @@ class _SMTPChecker(SMTP): except OSError as error: raise SMTPServerDisconnected(str(error)) if code >= 400: - raise SMTPNonSuccessError( - command=self.__command, code=code, text=message) + raise SMTPResponseException(code=code, msg=message) return code, message def starttls(self, *args, **kwargs): @@ -142,9 +140,7 @@ class _SMTPChecker(SMTP): """ code, message = super().mail(sender=sender, options=options) if code >= 400: - raise SMTPNonSuccessError( - command=self.__command, code=code, - text=message.decode(errors='ignore')) + raise SMTPResponseException(code=code, msg=message) return code, message def rcpt(self, recip: str, options: tuple = ()): @@ -160,9 +156,7 @@ class _SMTPChecker(SMTP): command='RCPT TO', code=code, text=message.decode(errors='ignore'))}) elif code >= 400: - raise SMTPNonSuccessError( - command=self.__command, code=code, - text=message.decode(errors='ignore')) + raise SMTPResponseException(code=code, msg=message) return code, message def quit(self): @@ -200,15 +194,13 @@ class _SMTPChecker(SMTP): command=self.__command, code=0, text=str(e)) return False except SMTPResponseException as e: - self.__communication_errors[self._host] = SMTPMessage( + smtp_message = SMTPMessage( command=self.__command, code=e.smtp_code, text=e.smtp_error.decode(errors='ignore')) - return False - except SMTPNonSuccessError as e: - if e.code >= 500: - self.__communication_errors[self._host] = e.smtp_message + if e.smtp_code >= 500: + self.__communication_errors[self._host] = smtp_message else: - self.__temporary_errors[self._host] = e.smtp_message + self.__temporary_errors[self._host] = smtp_message return False finally: self.quit() From b0e89cabd7f0e7aaf4aedcdf2caa152b678f19b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 17:37:29 +0100 Subject: [PATCH 10/27] Add changelog for 1.0.0 --- CHANGELOG.txt | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index e5e3cf3..5893548 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,3 +1,12 @@ +1.0.0: +- New major release with breaking changes! They are: + - Ambiguous results, and the possibility of more of them: + - The validate_email_or_fail() function will now raise an SMTPTemporaryError() on an ambiguous result. That is, greylisting or no servers providing a definitive negative or positive. + - A server that bails out with a 4xx code at any part of the SMTP conversation, will be marked as ambiguous, just like the way most implementations treat email delivery. + - The validate_email_or_fail() function will now raise an SMTPCommunicationError() on a denied email address but will keep trying other MX hosts for validation. If all other servers are ambiguous, the validation will fail nonetheless. + - Both of the aforementioned exceptions will contain the occurred communication results in their error_messages class variables. +- Props to @reinhard-mueller for coming up with the new proposal and helping in refining the idea. + 0.2.16: - Workaround for a bug in the built-in python 3.8 smtp library: https://github.com/karolyi/py3-validate-email/issues/50 @@ -104,4 +113,4 @@ - Handle 'No MX record' exception 0.1.3: -- Added ambigious (4xx) response code handling +- Added ambiguous (4xx) response code handling From 5e84348d9bcd7cd69dabeb024f0d072528612e13 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 17:43:52 +0100 Subject: [PATCH 11/27] Fix tests --- tests/test_mx_check.py | 3 ++- validate_email/validate_email.py | 8 ++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tests/test_mx_check.py b/tests/test_mx_check.py index d4f5de5..ebf9119 100644 --- a/tests/test_mx_check.py +++ b/tests/test_mx_check.py @@ -66,9 +66,10 @@ class GetMxRecordsTestCase(TestCase): _get_cleaned_mx_records(domain='testdomain3', timeout=10) self.assertTupleEqual(exc.exception.args, ()) - @patch.object(target=mx_module, attribute='_check_mx_records') + @patch.object(target=mx_module._SMTPChecker, attribute='check') def test_skip_smtp_argument(self, check_mx_records_mock): 'Check correct work of `skip_smtp` argument.' self.assertTrue(mx_check( EmailAddress('test@mail.ru'), debug=False, skip_smtp=True)) self.assertEqual(check_mx_records_mock.call_count, 0) + diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index 0786fde..a3bf6d1 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -35,16 +35,16 @@ def validate_email_or_fail( if the validation result is ambigious, and raise an exception if the validation fails. """ - email_address = EmailAddress(email_address) + email_address = EmailAddress(address=email_address) if from_address is not None: try: - from_address = EmailAddress(from_address) + from_address = EmailAddress(address=from_address) except AddressFormatError: raise FromAddressFormatError if check_regex: - regex_check(email_address) + regex_check(address=email_address) if use_blacklist: - domainlist_check(email_address) + domainlist_check(address=email_address) if not check_mx: return True return mx_check( From 09e5e56d48a68bc4598903f627e9001e57a82297 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 18:09:43 +0100 Subject: [PATCH 12/27] Adding initial SMTPCheckerTest tests --- tests/test_mx_check.py | 42 +++++++++++++++++++++++++++++++++++++++--- 1 file changed, 39 insertions(+), 3 deletions(-) diff --git a/tests/test_mx_check.py b/tests/test_mx_check.py index ebf9119..c84f28d 100644 --- a/tests/test_mx_check.py +++ b/tests/test_mx_check.py @@ -1,3 +1,4 @@ +from smtplib import SMTP, SMTPResponseException from types import SimpleNamespace from unittest.case import TestCase from unittest.mock import Mock, patch @@ -6,8 +7,11 @@ from dns.exception import Timeout from validate_email import mx_check as mx_module from validate_email.email_address import EmailAddress -from validate_email.exceptions import DNSTimeoutError, NoValidMXError -from validate_email.mx_check import _get_cleaned_mx_records, mx_check +from validate_email.exceptions import ( + DNSTimeoutError, NoValidMXError, SMTPCommunicationError, SMTPMessage, + SMTPTemporaryError) +from validate_email.mx_check import ( + _get_cleaned_mx_records, _SMTPChecker, mx_check) class DnsNameStub(object): @@ -66,10 +70,42 @@ class GetMxRecordsTestCase(TestCase): _get_cleaned_mx_records(domain='testdomain3', timeout=10) self.assertTupleEqual(exc.exception.args, ()) - @patch.object(target=mx_module._SMTPChecker, attribute='check') + @patch.object(target=_SMTPChecker, attribute='check') def test_skip_smtp_argument(self, check_mx_records_mock): 'Check correct work of `skip_smtp` argument.' self.assertTrue(mx_check( EmailAddress('test@mail.ru'), debug=False, skip_smtp=True)) self.assertEqual(check_mx_records_mock.call_count, 0) + check_mx_records_mock.call_count + +class SMTPCheckerTest(TestCase): + 'Checking the `_SMTPChecker` class functions.' + + @patch.object(target=SMTP, attribute='connect') + def test_connect_raises_serverdisconnected(self, mock_connect): + 'Connect raises `SMTPServerDisconnected`.' + mock_connect.side_effect = OSError('test message') + checker = _SMTPChecker( + local_hostname='localhost', timeout=5, debug=False, + sender='test@example.com', recip='test@example.com') + with self.assertRaises(SMTPCommunicationError) as exc: + checker.check(hosts=['testhost']) + self.assertDictEqual(exc.exception.error_messages, { + 'testhost': SMTPMessage( + command='connect', code=0, text='test message') + }) + + @patch.object(target=SMTP, attribute='connect') + def test_connect_with_error(self, mock_connect): + 'Connect raises `SMTPServerDisconnected`.' + checker = _SMTPChecker( + local_hostname='localhost', timeout=5, debug=False, + sender='test@example.com', recip='test@example.com') + mock_connect.return_value = (400, b'test delay message') + with self.assertRaises(SMTPTemporaryError) as exc: + checker.check(hosts=['testhost']) + self.assertDictEqual(exc.exception.error_messages, { + 'testhost': SMTPMessage( + command='connect', code=400, text='test delay message') + }) From 1d0207f7629ffd3b9bcd43807b66612f95753bfd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 18:15:02 +0100 Subject: [PATCH 13/27] Fix names --- tests/test_mx_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_mx_check.py b/tests/test_mx_check.py index c84f28d..63ce22a 100644 --- a/tests/test_mx_check.py +++ b/tests/test_mx_check.py @@ -80,7 +80,7 @@ class GetMxRecordsTestCase(TestCase): class SMTPCheckerTest(TestCase): - 'Checking the `_SMTPChecker` class functions.' + 'Checking the `_SMTPChecker` class methods.' @patch.object(target=SMTP, attribute='connect') def test_connect_raises_serverdisconnected(self, mock_connect): @@ -98,7 +98,7 @@ class SMTPCheckerTest(TestCase): @patch.object(target=SMTP, attribute='connect') def test_connect_with_error(self, mock_connect): - 'Connect raises `SMTPServerDisconnected`.' + 'Connect raises `SMTPTemporaryError`.' checker = _SMTPChecker( local_hostname='localhost', timeout=5, debug=False, sender='test@example.com', recip='test@example.com') From c6c3c18ebee3b8401c74afd1860395a29879db84 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Mon, 1 Mar 2021 19:11:17 +0100 Subject: [PATCH 14/27] Fix unused import so travis won't chimp out --- tests/test_mx_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_mx_check.py b/tests/test_mx_check.py index 63ce22a..91c47c1 100644 --- a/tests/test_mx_check.py +++ b/tests/test_mx_check.py @@ -1,4 +1,4 @@ -from smtplib import SMTP, SMTPResponseException +from smtplib import SMTP from types import SimpleNamespace from unittest.case import TestCase from unittest.mock import Mock, patch From a0f1cd1b046cb76aef052246db5f4ce750c00f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Tue, 2 Mar 2021 17:43:03 +0100 Subject: [PATCH 15/27] Improve logging options Issue our own log messages independently of the "debug" parameter, so a user can activate our log messages through Python's standard logger config features, while still having dsabled smtplib's debug messages, which are very verbose and always go to stderr. Change the log output for failed and ambiguous verifications from "warning" to "info" as this is a normal function of the library and not something that would require attention. Also this makes sure that no log output is generated if logging is not configured at all (default is to only display warning and above). --- validate_email/mx_check.py | 3 +-- validate_email/validate_email.py | 8 ++------ 2 files changed, 3 insertions(+), 8 deletions(-) diff --git a/validate_email/mx_check.py b/validate_email/mx_check.py index 3c8fac1..fb98d9f 100644 --- a/validate_email/mx_check.py +++ b/validate_email/mx_check.py @@ -212,8 +212,7 @@ class _SMTPChecker(SMTP): return `True`, else raise exceptions described in `mx_check`. """ for host in hosts: - if self.debuglevel > 0: - LOGGER.debug(msg=f'Trying {host} ...') + LOGGER.debug(msg=f'Trying {host} ...') if self._check_one(host=host): return True # Raise appropriate exceptions when necessary diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index a3bf6d1..6d36560 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -63,12 +63,8 @@ def validate_email(email_address: str, *args, **kwargs): try: return validate_email_or_fail(email_address, *args, **kwargs) except SMTPTemporaryError as error: - message = f'Validation for {email_address!r} ambigious: {error}' - if kwargs.get('debug'): - LOGGER.warning(msg=message) + LOGGER.info(msg=f'Validation for {email_address!r} ambigious: {error}') return except EmailValidationError as error: - message = f'Validation for {email_address!r} failed: {error}' - if kwargs.get('debug'): - LOGGER.warning(msg=message) + LOGGER.info(msg=f'Validation for {email_address!r} failed: {error}') return False From bcbadbab651709f21ded1256d37d6c5fc4d2d2c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Tue, 2 Mar 2021 18:30:13 +0100 Subject: [PATCH 16/27] Split mx_check into dns_check and smtp_check This will allow us to cleanly and consistently keep the four verification steps separate: format (regex) - blacklist - dns - smtp. --- tests/test_dns_check.py | 66 +++++++++++ tests/test_mx_check.py | 111 ------------------ tests/test_smtp_check.py | 39 ++++++ validate_email/dns_check.py | 65 ++++++++++ validate_email/{mx_check.py => smtp_check.py} | 82 ++----------- validate_email/validate_email.py | 15 ++- 6 files changed, 191 insertions(+), 187 deletions(-) create mode 100644 tests/test_dns_check.py delete mode 100644 tests/test_mx_check.py create mode 100644 tests/test_smtp_check.py create mode 100644 validate_email/dns_check.py rename validate_email/{mx_check.py => smtp_check.py} (73%) diff --git a/tests/test_dns_check.py b/tests/test_dns_check.py new file mode 100644 index 0000000..88e5247 --- /dev/null +++ b/tests/test_dns_check.py @@ -0,0 +1,66 @@ +from types import SimpleNamespace +from unittest.case import TestCase +from unittest.mock import Mock, patch + +from dns.exception import Timeout + +from validate_email import dns_check +from validate_email.exceptions import DNSTimeoutError, NoValidMXError +from validate_email.dns_check import _get_cleaned_mx_records + + +class DnsNameStub(object): + 'Stub for `dns.name.Name`.' + + def __init__(self, value: str): + self.value = value + + def to_text(self) -> str: + return self.value + + +TEST_QUERY = Mock() + + +class GetMxRecordsTestCase(TestCase): + 'Testing `_get_mx_records`.' + + @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) + def test_fails_with_invalid_hostnames(self): + 'Fails when an MX hostname is "."' + TEST_QUERY.return_value = [ + SimpleNamespace(exchange=DnsNameStub(value='.'))] + with self.assertRaises(NoValidMXError) as exc: + _get_cleaned_mx_records(domain='testdomain1', timeout=10) + self.assertTupleEqual(exc.exception.args, ()) + + @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) + def test_fails_with_null_hostnames(self): + 'Fails when an MX hostname is invalid.' + TEST_QUERY.return_value = [ + SimpleNamespace(exchange=DnsNameStub(value='asdqwe'))] + with self.assertRaises(NoValidMXError) as exc: + _get_cleaned_mx_records(domain='testdomain2', timeout=10) + self.assertTupleEqual(exc.exception.args, ()) + + @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) + def test_filters_out_invalid_hostnames(self): + 'Returns only the valid hostnames.' + TEST_QUERY.return_value = [ + SimpleNamespace(exchange=DnsNameStub(value='asdqwe.')), + SimpleNamespace(exchange=DnsNameStub(value='.')), + SimpleNamespace(exchange=DnsNameStub(value='valid.host.')), + # This is an intentional duplicate. + SimpleNamespace(exchange=DnsNameStub(value='valid.host.')), + SimpleNamespace(exchange=DnsNameStub(value='valid2.host.')), + ] + result = _get_cleaned_mx_records(domain='testdomain3', timeout=10) + self.assertListEqual(result, ['valid.host', 'valid2.host']) + + @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) + def test_raises_exception_on_dns_timeout(self): + 'Raises exception on DNS timeout.' + TEST_QUERY.side_effect = Timeout() + with self.assertRaises(DNSTimeoutError) as exc: + _get_cleaned_mx_records(domain='testdomain3', timeout=10) + self.assertTupleEqual(exc.exception.args, ()) diff --git a/tests/test_mx_check.py b/tests/test_mx_check.py deleted file mode 100644 index 91c47c1..0000000 --- a/tests/test_mx_check.py +++ /dev/null @@ -1,111 +0,0 @@ -from smtplib import SMTP -from types import SimpleNamespace -from unittest.case import TestCase -from unittest.mock import Mock, patch - -from dns.exception import Timeout - -from validate_email import mx_check as mx_module -from validate_email.email_address import EmailAddress -from validate_email.exceptions import ( - DNSTimeoutError, NoValidMXError, SMTPCommunicationError, SMTPMessage, - SMTPTemporaryError) -from validate_email.mx_check import ( - _get_cleaned_mx_records, _SMTPChecker, mx_check) - - -class DnsNameStub(object): - 'Stub for `dns.name.Name`.' - - def __init__(self, value: str): - self.value = value - - def to_text(self) -> str: - return self.value - - -TEST_QUERY = Mock() - - -class GetMxRecordsTestCase(TestCase): - 'Testing `_get_mx_records`.' - - @patch.object(target=mx_module, attribute='resolve', new=TEST_QUERY) - def test_fails_with_invalid_hostnames(self): - 'Fails when an MX hostname is "."' - TEST_QUERY.return_value = [ - SimpleNamespace(exchange=DnsNameStub(value='.'))] - with self.assertRaises(NoValidMXError) as exc: - _get_cleaned_mx_records(domain='testdomain1', timeout=10) - self.assertTupleEqual(exc.exception.args, ()) - - @patch.object(target=mx_module, attribute='resolve', new=TEST_QUERY) - def test_fails_with_null_hostnames(self): - 'Fails when an MX hostname is invalid.' - TEST_QUERY.return_value = [ - SimpleNamespace(exchange=DnsNameStub(value='asdqwe'))] - with self.assertRaises(NoValidMXError) as exc: - _get_cleaned_mx_records(domain='testdomain2', timeout=10) - self.assertTupleEqual(exc.exception.args, ()) - - @patch.object(target=mx_module, attribute='resolve', new=TEST_QUERY) - def test_filters_out_invalid_hostnames(self): - 'Returns only the valid hostnames.' - TEST_QUERY.return_value = [ - SimpleNamespace(exchange=DnsNameStub(value='asdqwe.')), - SimpleNamespace(exchange=DnsNameStub(value='.')), - SimpleNamespace(exchange=DnsNameStub(value='valid.host.')), - # This is an intentional duplicate. - SimpleNamespace(exchange=DnsNameStub(value='valid.host.')), - SimpleNamespace(exchange=DnsNameStub(value='valid2.host.')), - ] - result = _get_cleaned_mx_records(domain='testdomain3', timeout=10) - self.assertListEqual(result, ['valid.host', 'valid2.host']) - - @patch.object(target=mx_module, attribute='resolve', new=TEST_QUERY) - def test_raises_exception_on_dns_timeout(self): - 'Raises exception on DNS timeout.' - TEST_QUERY.side_effect = Timeout() - with self.assertRaises(DNSTimeoutError) as exc: - _get_cleaned_mx_records(domain='testdomain3', timeout=10) - self.assertTupleEqual(exc.exception.args, ()) - - @patch.object(target=_SMTPChecker, attribute='check') - def test_skip_smtp_argument(self, check_mx_records_mock): - 'Check correct work of `skip_smtp` argument.' - self.assertTrue(mx_check( - EmailAddress('test@mail.ru'), debug=False, skip_smtp=True)) - self.assertEqual(check_mx_records_mock.call_count, 0) - check_mx_records_mock.call_count - - -class SMTPCheckerTest(TestCase): - 'Checking the `_SMTPChecker` class methods.' - - @patch.object(target=SMTP, attribute='connect') - def test_connect_raises_serverdisconnected(self, mock_connect): - 'Connect raises `SMTPServerDisconnected`.' - mock_connect.side_effect = OSError('test message') - checker = _SMTPChecker( - local_hostname='localhost', timeout=5, debug=False, - sender='test@example.com', recip='test@example.com') - with self.assertRaises(SMTPCommunicationError) as exc: - checker.check(hosts=['testhost']) - self.assertDictEqual(exc.exception.error_messages, { - 'testhost': SMTPMessage( - command='connect', code=0, text='test message') - }) - - @patch.object(target=SMTP, attribute='connect') - def test_connect_with_error(self, mock_connect): - 'Connect raises `SMTPTemporaryError`.' - checker = _SMTPChecker( - local_hostname='localhost', timeout=5, debug=False, - sender='test@example.com', recip='test@example.com') - mock_connect.return_value = (400, b'test delay message') - with self.assertRaises(SMTPTemporaryError) as exc: - checker.check(hosts=['testhost']) - self.assertDictEqual(exc.exception.error_messages, { - 'testhost': SMTPMessage( - command='connect', code=400, text='test delay message') - }) diff --git a/tests/test_smtp_check.py b/tests/test_smtp_check.py new file mode 100644 index 0000000..5eacf80 --- /dev/null +++ b/tests/test_smtp_check.py @@ -0,0 +1,39 @@ +from smtplib import SMTP +from unittest.case import TestCase +from unittest.mock import patch + +from validate_email.exceptions import ( + SMTPCommunicationError, SMTPMessage, SMTPTemporaryError) +from validate_email.smtp_check import _SMTPChecker + + +class SMTPCheckerTest(TestCase): + 'Checking the `_SMTPChecker` class methods.' + + @patch.object(target=SMTP, attribute='connect') + def test_connect_raises_serverdisconnected(self, mock_connect): + 'Connect raises `SMTPServerDisconnected`.' + mock_connect.side_effect = OSError('test message') + checker = _SMTPChecker( + local_hostname='localhost', timeout=5, debug=False, + sender='test@example.com', recip='test@example.com') + with self.assertRaises(SMTPCommunicationError) as exc: + checker.check(hosts=['testhost']) + self.assertDictEqual(exc.exception.error_messages, { + 'testhost': SMTPMessage( + command='connect', code=0, text='test message') + }) + + @patch.object(target=SMTP, attribute='connect') + def test_connect_with_error(self, mock_connect): + 'Connect raises `SMTPTemporaryError`.' + checker = _SMTPChecker( + local_hostname='localhost', timeout=5, debug=False, + sender='test@example.com', recip='test@example.com') + mock_connect.return_value = (400, b'test delay message') + with self.assertRaises(SMTPTemporaryError) as exc: + checker.check(hosts=['testhost']) + self.assertDictEqual(exc.exception.error_messages, { + 'testhost': SMTPMessage( + command='connect', code=400, text='test delay message') + }) diff --git a/validate_email/dns_check.py b/validate_email/dns_check.py new file mode 100644 index 0000000..7fb5b10 --- /dev/null +++ b/validate_email/dns_check.py @@ -0,0 +1,65 @@ +from dns.exception import Timeout +from dns.rdatatype import MX as rdtype_mx +from dns.rdtypes.ANY.MX import MX +from dns.resolver import ( + NXDOMAIN, YXDOMAIN, Answer, NoAnswer, NoNameservers, resolve) + +from .constants import HOST_REGEX +from .email_address import EmailAddress +from .exceptions import ( + DNSConfigurationError, DNSTimeoutError, DomainNotFoundError, NoMXError, + NoNameserverError, NoValidMXError) + + +def _get_mx_records(domain: str, timeout: int) -> list: + 'Return the DNS response for checking, optionally raise exceptions.' + try: + return resolve( + qname=domain, rdtype=rdtype_mx, lifetime=timeout, + search=True) # type: Answer + except NXDOMAIN: + raise DomainNotFoundError + except NoNameservers: + raise NoNameserverError + except Timeout: + raise DNSTimeoutError + except YXDOMAIN: + raise DNSConfigurationError + except NoAnswer: + raise NoMXError + + +def _get_cleaned_mx_records(domain: str, timeout: int) -> list: + """ + Return a list of hostnames in the MX record, raise an exception on + any issues. + """ + records = _get_mx_records(domain=domain, timeout=timeout) + to_check = list() + host_set = set() + for record in records: # type: MX + dns_str = record.exchange.to_text().rstrip('.') # type: str + if dns_str in host_set: + continue + to_check.append(dns_str) + host_set.add(dns_str) + result = [x for x in to_check if HOST_REGEX.search(string=x)] + if not result: + raise NoValidMXError + return result + + +def dns_check(email_address: EmailAddress, dns_timeout: int = 10) -> list: + """ + Check whether there are any responsible SMTP servers for the email + address by looking up the DNS MX records. + + In case no responsible SMTP servers can be determined, a variety of + exceptions is raised depending on the exact issue, all derived from + `MXError`. Otherwise, return the list of MX hostnames. + """ + if email_address.domain_literal_ip: + return [email_address.domain_literal_ip] + else: + return _get_cleaned_mx_records( + domain=email_address.domain, timeout=dns_timeout) diff --git a/validate_email/mx_check.py b/validate_email/smtp_check.py similarity index 73% rename from validate_email/mx_check.py rename to validate_email/smtp_check.py index fb98d9f..8ad0f67 100644 --- a/validate_email/mx_check.py +++ b/validate_email/smtp_check.py @@ -3,60 +3,14 @@ from smtplib import ( SMTP, SMTPNotSupportedError, SMTPResponseException, SMTPServerDisconnected) from typing import List, Optional, Tuple -from dns.exception import Timeout -from dns.rdatatype import MX as rdtype_mx -from dns.rdtypes.ANY.MX import MX -from dns.resolver import ( - NXDOMAIN, YXDOMAIN, Answer, NoAnswer, NoNameservers, resolve) - -from .constants import HOST_REGEX from .email_address import EmailAddress from .exceptions import ( - AddressNotDeliverableError, DNSConfigurationError, DNSTimeoutError, - DomainNotFoundError, NoMXError, NoNameserverError, NoValidMXError, - SMTPCommunicationError, SMTPMessage, SMTPTemporaryError) + AddressNotDeliverableError, SMTPCommunicationError, SMTPMessage, + SMTPTemporaryError) LOGGER = getLogger(name=__name__) -def _get_mx_records(domain: str, timeout: int) -> list: - 'Return the DNS response for checking, optionally raise exceptions.' - try: - return resolve( - qname=domain, rdtype=rdtype_mx, lifetime=timeout, - search=True) # type: Answer - except NXDOMAIN: - raise DomainNotFoundError - except NoNameservers: - raise NoNameserverError - except Timeout: - raise DNSTimeoutError - except YXDOMAIN: - raise DNSConfigurationError - except NoAnswer: - raise NoMXError - - -def _get_cleaned_mx_records(domain: str, timeout: int) -> list: - """ - Return a list of hostnames in the MX record, raise an exception on - any issues. - """ - records = _get_mx_records(domain=domain, timeout=timeout) - to_check = list() - host_set = set() - for record in records: # type: MX - dns_str = record.exchange.to_text().rstrip('.') # type: str - if dns_str in host_set: - continue - to_check.append(dns_str) - host_set.add(dns_str) - result = [x for x in to_check if HOST_REGEX.search(string=x)] - if not result: - raise NoValidMXError - return result - - class _SMTPChecker(SMTP): """ A specialized variant of `smtplib.SMTP` for checking the validity of @@ -209,7 +163,7 @@ class _SMTPChecker(SMTP): def check(self, hosts: List[str]) -> bool: """ Run the check for all given SMTP servers. On positive result, - return `True`, else raise exceptions described in `mx_check`. + return `True`, else raise exceptions described in `smtp_check`. """ for host in hosts: LOGGER.debug(msg=f'Trying {host} ...') @@ -223,41 +177,27 @@ class _SMTPChecker(SMTP): raise SMTPTemporaryError(error_messages=self.__temporary_errors) -def mx_check( - email_address: EmailAddress, debug: bool, +def smtp_check( + email_address: EmailAddress, mx_records: list, debug: bool, from_address: Optional[EmailAddress] = None, - helo_host: Optional[str] = None, smtp_timeout: int = 10, - dns_timeout: int = 10, skip_smtp: bool = False) -> bool: + helo_host: Optional[str] = None, smtp_timeout: int = 10) -> bool: """ - Returns `True` as soon as the any server accepts the recipient - address. + Returns `True` as soon as the any of the given server accepts the + recipient address. Raise an `AddressNotDeliverableError` if any server unambiguously and permanently refuses to accept the recipient address. Raise `SMTPTemporaryError` if the server answers with a temporary - error code when validity of the email address can not be - determined. Greylisting or server delivery issues can be a cause for - this. + error code when validity of the email address can not be determined. + Greylisting or server delivery issues can be a cause for this. Raise `SMTPCommunicationError` if the SMTP server(s) reply with an error message to any of the communication steps before the recipient address is checked, and the validity of the email address can not be determined either. - - In case no responsible SMTP servers can be determined, a variety of - exceptions is raised depending on the exact issue, all derived from - `MXError`. """ - from_address = from_address or email_address - if email_address.domain_literal_ip: - mx_records = [email_address.domain_literal_ip] - else: - mx_records = _get_cleaned_mx_records( - domain=email_address.domain, timeout=dns_timeout) - if skip_smtp: - return True smtp_checker = _SMTPChecker( local_hostname=helo_host, timeout=smtp_timeout, debug=debug, - sender=from_address, recip=email_address) + sender=from_address or email_address, recip=email_address) return smtp_checker.check(hosts=mx_records) diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index 6d36560..b21a9e1 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -1,13 +1,14 @@ from logging import getLogger from typing import Optional +from .dns_check import dns_check from .domainlist_check import domainlist_check from .email_address import EmailAddress from .exceptions import ( AddressFormatError, EmailValidationError, FromAddressFormatError, SMTPTemporaryError) -from .mx_check import mx_check from .regex_check import regex_check +from .smtp_check import smtp_check LOGGER = getLogger(name=__name__) @@ -47,10 +48,14 @@ def validate_email_or_fail( domainlist_check(address=email_address) if not check_mx: return True - return mx_check( - email_address=email_address, from_address=from_address, - helo_host=helo_host, smtp_timeout=smtp_timeout, - dns_timeout=dns_timeout, skip_smtp=skip_smtp, debug=debug) + mx_records = dns_check( + email_address=email_address, dns_timeout=dns_timeout) + if skip_smtp: + return True + return smtp_check( + email_address=email_address, mx_records=mx_records, + from_address=from_address, helo_host=helo_host, + smtp_timeout=smtp_timeout, debug=debug) def validate_email(email_address: str, *args, **kwargs): From e1684f7f72da73f6a4245d28daedeb294cf0e3de Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Wed, 3 Mar 2021 21:53:25 +0100 Subject: [PATCH 17/27] Make isort happy --- tests/test_dns_check.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_dns_check.py b/tests/test_dns_check.py index 88e5247..aea30b4 100644 --- a/tests/test_dns_check.py +++ b/tests/test_dns_check.py @@ -5,8 +5,8 @@ from unittest.mock import Mock, patch from dns.exception import Timeout from validate_email import dns_check -from validate_email.exceptions import DNSTimeoutError, NoValidMXError from validate_email.dns_check import _get_cleaned_mx_records +from validate_email.exceptions import DNSTimeoutError, NoValidMXError class DnsNameStub(object): From 4103a3a1b32a09e8effe8a64dff6936711e4512f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Sun, 7 Mar 2021 13:02:21 +0100 Subject: [PATCH 18/27] Adjust CHANGELOG.txt --- CHANGELOG.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 5893548..c889bf1 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -5,7 +5,9 @@ - A server that bails out with a 4xx code at any part of the SMTP conversation, will be marked as ambiguous, just like the way most implementations treat email delivery. - The validate_email_or_fail() function will now raise an SMTPCommunicationError() on a denied email address but will keep trying other MX hosts for validation. If all other servers are ambiguous, the validation will fail nonetheless. - Both of the aforementioned exceptions will contain the occurred communication results in their error_messages class variables. -- Props to @reinhard-mueller for coming up with the new proposal and helping in refining the idea. + - Internal API changes (refactorings) + - Check results are now logged with info level, instead of emitting warnings when debug is turned on. +- Props to @reinhard-mueller for coming up with the new proposals and helping in refining the idea. 0.2.16: - Workaround for a bug in the built-in python 3.8 smtp library: https://github.com/karolyi/py3-validate-email/issues/50 From 1d5e7810aec1c37ddad84dfff1f21ccec549de85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Sun, 7 Mar 2021 13:23:57 +0100 Subject: [PATCH 19/27] Cleaning up changelog --- CHANGELOG.txt | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index c889bf1..3532418 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,9 +1,11 @@ 1.0.0: - New major release with breaking changes! They are: - - Ambiguous results, and the possibility of more of them: + - Ambiguous results and the possibility of more of them, to reflect a real world SMTP delivery process: + - The module will keep trying probing through all MX hosts for validation and emit errors in the end of the full probing procedure. + - Any acceptance of the email delivery will be marked as valid, despite any other ambigious or negative result(s). + - The validate_email_or_fail() function will raise an SMTPCommunicationError() on a denied email address only in the end. - The validate_email_or_fail() function will now raise an SMTPTemporaryError() on an ambiguous result. That is, greylisting or no servers providing a definitive negative or positive. - - A server that bails out with a 4xx code at any part of the SMTP conversation, will be marked as ambiguous, just like the way most implementations treat email delivery. - - The validate_email_or_fail() function will now raise an SMTPCommunicationError() on a denied email address but will keep trying other MX hosts for validation. If all other servers are ambiguous, the validation will fail nonetheless. + - A server that bails out with a 4xx code at any part of the SMTP conversation, will be marked as ambiguous. - Both of the aforementioned exceptions will contain the occurred communication results in their error_messages class variables. - Internal API changes (refactorings) - Check results are now logged with info level, instead of emitting warnings when debug is turned on. From 1b9b0682cddd214673e758d8bf3a954afc30a468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Thu, 11 Mar 2021 16:09:09 +0100 Subject: [PATCH 20/27] Improve emulation of true SMTP process * Check the SMTP servers in order of priority instead of random order. * Handle SMTPServerDisconnected like a 451 status as recommended by RFC 5321. * Exit early by directly raising CommunicationError on the first 5xx SMTPResponseException. See also the discussion at https://github.com/karolyi/py3-validate-email/discussions/61 --- tests/test_dns_check.py | 36 ++++++++++++++++++++++++------------ tests/test_smtp_check.py | 9 ++++----- validate_email/dns_check.py | 8 ++++---- validate_email/smtp_check.py | 24 +++++++++++------------- 4 files changed, 43 insertions(+), 34 deletions(-) diff --git a/tests/test_dns_check.py b/tests/test_dns_check.py index aea30b4..d73e334 100644 --- a/tests/test_dns_check.py +++ b/tests/test_dns_check.py @@ -19,6 +19,21 @@ class DnsNameStub(object): return self.value +class DnsRRsetStub(object): + 'Stub for `dns.rrset.RRset`.' + + def __init__(self, hostnames: list): + self.names = [ + SimpleNamespace(exchange=DnsNameStub(value=x)) for x in hostnames] + + def processing_order(self): + return self.names + + +def _answer(hostnames: list): + return SimpleNamespace(rrset=DnsRRsetStub(hostnames=hostnames)) + + TEST_QUERY = Mock() @@ -28,8 +43,7 @@ class GetMxRecordsTestCase(TestCase): @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) def test_fails_with_invalid_hostnames(self): 'Fails when an MX hostname is "."' - TEST_QUERY.return_value = [ - SimpleNamespace(exchange=DnsNameStub(value='.'))] + TEST_QUERY.return_value = _answer(hostnames=['.']) with self.assertRaises(NoValidMXError) as exc: _get_cleaned_mx_records(domain='testdomain1', timeout=10) self.assertTupleEqual(exc.exception.args, ()) @@ -37,8 +51,7 @@ class GetMxRecordsTestCase(TestCase): @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) def test_fails_with_null_hostnames(self): 'Fails when an MX hostname is invalid.' - TEST_QUERY.return_value = [ - SimpleNamespace(exchange=DnsNameStub(value='asdqwe'))] + TEST_QUERY.return_value = _answer(hostnames=['asdqwe']) with self.assertRaises(NoValidMXError) as exc: _get_cleaned_mx_records(domain='testdomain2', timeout=10) self.assertTupleEqual(exc.exception.args, ()) @@ -46,14 +59,13 @@ class GetMxRecordsTestCase(TestCase): @patch.object(target=dns_check, attribute='resolve', new=TEST_QUERY) def test_filters_out_invalid_hostnames(self): 'Returns only the valid hostnames.' - TEST_QUERY.return_value = [ - SimpleNamespace(exchange=DnsNameStub(value='asdqwe.')), - SimpleNamespace(exchange=DnsNameStub(value='.')), - SimpleNamespace(exchange=DnsNameStub(value='valid.host.')), - # This is an intentional duplicate. - SimpleNamespace(exchange=DnsNameStub(value='valid.host.')), - SimpleNamespace(exchange=DnsNameStub(value='valid2.host.')), - ] + TEST_QUERY.return_value = _answer(hostnames=[ + 'asdqwe.', + '.', + 'valid.host.', + 'valid.host.', # This is an intentional duplicate. + 'valid2.host.', + ]) result = _get_cleaned_mx_records(domain='testdomain3', timeout=10) self.assertListEqual(result, ['valid.host', 'valid2.host']) diff --git a/tests/test_smtp_check.py b/tests/test_smtp_check.py index 5eacf80..5266197 100644 --- a/tests/test_smtp_check.py +++ b/tests/test_smtp_check.py @@ -2,8 +2,7 @@ from smtplib import SMTP from unittest.case import TestCase from unittest.mock import patch -from validate_email.exceptions import ( - SMTPCommunicationError, SMTPMessage, SMTPTemporaryError) +from validate_email.exceptions import SMTPMessage, SMTPTemporaryError from validate_email.smtp_check import _SMTPChecker @@ -12,16 +11,16 @@ class SMTPCheckerTest(TestCase): @patch.object(target=SMTP, attribute='connect') def test_connect_raises_serverdisconnected(self, mock_connect): - 'Connect raises `SMTPServerDisconnected`.' + 'Connect raises `SMTPTemporaryError`.' mock_connect.side_effect = OSError('test message') checker = _SMTPChecker( local_hostname='localhost', timeout=5, debug=False, sender='test@example.com', recip='test@example.com') - with self.assertRaises(SMTPCommunicationError) as exc: + with self.assertRaises(SMTPTemporaryError) as exc: checker.check(hosts=['testhost']) self.assertDictEqual(exc.exception.error_messages, { 'testhost': SMTPMessage( - command='connect', code=0, text='test message') + command='connect', code=451, text='test message') }) @patch.object(target=SMTP, attribute='connect') diff --git a/validate_email/dns_check.py b/validate_email/dns_check.py index 7fb5b10..a45ae15 100644 --- a/validate_email/dns_check.py +++ b/validate_email/dns_check.py @@ -11,12 +11,12 @@ from .exceptions import ( NoNameserverError, NoValidMXError) -def _get_mx_records(domain: str, timeout: int) -> list: +def _get_mx_records(domain: str, timeout: int) -> Answer: 'Return the DNS response for checking, optionally raise exceptions.' try: return resolve( qname=domain, rdtype=rdtype_mx, lifetime=timeout, - search=True) # type: Answer + search=True) except NXDOMAIN: raise DomainNotFoundError except NoNameservers: @@ -34,10 +34,10 @@ def _get_cleaned_mx_records(domain: str, timeout: int) -> list: Return a list of hostnames in the MX record, raise an exception on any issues. """ - records = _get_mx_records(domain=domain, timeout=timeout) + answer = _get_mx_records(domain=domain, timeout=timeout) to_check = list() host_set = set() - for record in records: # type: MX + for record in answer.rrset.processing_order(): # type: MX dns_str = record.exchange.to_text().rstrip('.') # type: str if dns_str in host_set: continue diff --git a/validate_email/smtp_check.py b/validate_email/smtp_check.py index 8ad0f67..42b8ff9 100644 --- a/validate_email/smtp_check.py +++ b/validate_email/smtp_check.py @@ -38,7 +38,6 @@ class _SMTPChecker(SMTP): self.set_debuglevel(debuglevel=2 if debug else False) self.__sender = sender self.__recip = recip - self.__communication_errors = {} self.__temporary_errors = {} # Avoid error on close() after unsuccessful connect self.sock = None @@ -144,15 +143,16 @@ class _SMTPChecker(SMTP): self.mail(sender=self.__sender.ace) code, message = self.rcpt(recip=self.__recip.ace) except SMTPServerDisconnected as e: - self.__communication_errors[self._host] = SMTPMessage( - command=self.__command, code=0, text=str(e)) + self.__temporary_errors[self._host] = SMTPMessage( + command=self.__command, code=451, text=str(e)) return False except SMTPResponseException as e: smtp_message = SMTPMessage( command=self.__command, code=e.smtp_code, text=e.smtp_error.decode(errors='ignore')) if e.smtp_code >= 500: - self.__communication_errors[self._host] = smtp_message + raise SMTPCommunicationError( + error_messages={self._host: smtp_message}) else: self.__temporary_errors[self._host] = smtp_message return False @@ -169,11 +169,8 @@ class _SMTPChecker(SMTP): LOGGER.debug(msg=f'Trying {host} ...') if self._check_one(host=host): return True - # Raise appropriate exceptions when necessary - if self.__communication_errors: - raise SMTPCommunicationError( - error_messages=self.__communication_errors) - elif self.__temporary_errors: + # Raise exception for collected temporary errors + if self.__temporary_errors: raise SMTPTemporaryError(error_messages=self.__temporary_errors) @@ -188,11 +185,12 @@ def smtp_check( Raise an `AddressNotDeliverableError` if any server unambiguously and permanently refuses to accept the recipient address. - Raise `SMTPTemporaryError` if the server answers with a temporary - error code when validity of the email address can not be determined. - Greylisting or server delivery issues can be a cause for this. + Raise `SMTPTemporaryError` if all the servers answer with a + temporary error code during the SMTP communication. This means that + the validity of the email address can not be determined. Greylisting + or server delivery issues can be a cause for this. - Raise `SMTPCommunicationError` if the SMTP server(s) reply with an + Raise `SMTPCommunicationError` if any SMTP server replies with an error message to any of the communication steps before the recipient address is checked, and the validity of the email address can not be determined either. From da540d8db2f7a344b10864cc78e0a06200dbaa35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Fri, 12 Mar 2021 01:06:13 +0100 Subject: [PATCH 21/27] Clean up parameter list, update docs See also discussion at https://github.com/karolyi/py3-validate-email/discussions/57 --- CHANGELOG.txt | 21 +++-- FAQ.md | 2 +- README.rst | 124 ++++++++++++++++++++++++++--- tests/test_blacklist_check.py | 16 ++-- validate_email/dns_check.py | 4 +- validate_email/domainlist_check.py | 6 +- validate_email/exceptions.py | 14 ++-- validate_email/regex_check.py | 10 +-- validate_email/smtp_check.py | 9 ++- validate_email/validate_email.py | 43 +++++----- 10 files changed, 182 insertions(+), 67 deletions(-) diff --git a/CHANGELOG.txt b/CHANGELOG.txt index 3532418..6889ada 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -1,12 +1,21 @@ 1.0.0: - New major release with breaking changes! They are: + - Parameter names for validate_email() and validate_email_or_fail() have changed: + - check_regex -> check_format + - use_blacklist -> check_blacklist + - check_mx -> check_dns + - skip_smtp -> check_smtp (with inverted logic) + - helo_host -> smtp_helo_host + - from_address -> smtp_from_address + - debug -> smtp_debug + - All parameters except for the first one (the email address to check) are now keyword-only. - Ambiguous results and the possibility of more of them, to reflect a real world SMTP delivery process: - - The module will keep trying probing through all MX hosts for validation and emit errors in the end of the full probing procedure. - - Any acceptance of the email delivery will be marked as valid, despite any other ambigious or negative result(s). - - The validate_email_or_fail() function will raise an SMTPCommunicationError() on a denied email address only in the end. - - The validate_email_or_fail() function will now raise an SMTPTemporaryError() on an ambiguous result. That is, greylisting or no servers providing a definitive negative or positive. - - A server that bails out with a 4xx code at any part of the SMTP conversation, will be marked as ambiguous. - - Both of the aforementioned exceptions will contain the occurred communication results in their error_messages class variables. + - The module tries all MX hosts in order of priority. + - An acceptance of the email address will yield a positive verification result, no further MX hosts will be tried. + - Any permanent SMTP error (5xx) will yield a negative verification result, no further MX hosts will be tried. + - Any temporary SMTP error (4xx) or any connection issue will cause the next MX host to be tried. Only if all MX hosts yield these kinds of errors, the overall verification result will be ambiguous. That is, greylisting or no servers providing a definitive negative or positive. + - The validate_email_or_fail() function will now raise an SMTPTemporaryError() on an ambiguous result. + - All exceptions raised by the SMTP check will contain the occurred communication results in their error_messages class variables. - Internal API changes (refactorings) - Check results are now logged with info level, instead of emitting warnings when debug is turned on. - Props to @reinhard-mueller for coming up with the new proposals and helping in refining the idea. diff --git a/FAQ.md b/FAQ.md index 81a2f1c..85a728d 100644 --- a/FAQ.md +++ b/FAQ.md @@ -36,7 +36,7 @@ Run this code with the module installed (use your parameters within), and see the output: ```python -python -c 'import logging, sys; logging.basicConfig(stream=sys.stderr, level=logging.DEBUG); from validate_email import validate_email; print(validate_email(\'your.email@address.com\', check_mx=True, debug=True))' +python -c 'import logging, sys; logging.basicConfig(stream=sys.stderr, level=logging.DEBUG); from validate_email import validate_email; print(validate_email(\'your.email@address.com\', smtp_debug=True))' ``` If you still don't understand why your code doesn't work as expected by diff --git a/README.rst b/README.rst index 09cd8c8..5add316 100644 --- a/README.rst +++ b/README.rst @@ -25,32 +25,135 @@ USAGE Basic usage:: from validate_email import validate_email - is_valid = validate_email(email_address='example@example.com', check_regex=True, check_mx=True, from_address='my@from.addr.ess', helo_host='my.host.name', smtp_timeout=10, dns_timeout=10, use_blacklist=True, debug=False) + is_valid = validate_email(email_address='example@example.com', check_format=True, check_blacklist=True, check_dns=True, dns_timeout=10, check_smtp=True, smtp_timeout=10, smtp_helo_host='my.host.name', smtp_from_address='my@from.addr.ess', smtp_debug=False) -:code:`check_regex` will check will the email address has a valid structure and defaults to True +Parameters +---------------------------- -:code:`check_mx`: check the mx-records and check whether the email actually exists +:code:`email_address`: the email address to check -:code:`from_address`: the email address the probe will be sent from +:code:`check_format`: check whether the email address has a valid structure; defaults to :code:`True` -:code:`helo_host`: the host to use in SMTP HELO when checking for an email +:code:`check_blacklist`: check the email against the blacklist of domains downloaded from https://github.com/martenson/disposable-email-domains; defaults to :code:`True` -:code:`smtp_timeout`: seconds until SMTP timeout +:code:`check_dns`: check the DNS mx-records, defaults to :code:`True` + +:code:`dns_timeout`: seconds until DNS timeout; defaults to 10 seconds -:code:`dns_timeout`: seconds until DNS timeout +:code:`check_smtp`: check whether the email actually exists by initiating an SMTP conversation; defaults to :code:`True` -:code:`use_blacklist`: use the blacklist of domains downloaded from https://github.com/martenson/disposable-email-domains +:code:`smtp_timeout`: seconds until SMTP timeout; defaults to 10 seconds -:code:`debug`: emit debug/warning messages while checking email +:code:`smtp_helo_host`: the hostname to use in SMTP HELO/EHLO; if set to :code:`None` (the default), the fully qualified domain name of the local host is used -:code:`skip_smtp`: (default :code:`False`) skip the SMTP conversation with the server, after MX checks. Will automatically be set to :code:`True` when :code:`check_mx` is :code:`False`! +:code:`smtp_from_address`: the email address used for the sender in the SMTP conversation; if set to :code:`None` (the default), the :code:`email_address` parameter is used as the sender as well + +:code:`smtp_debug`: activate :code:`smtplib`'s debug output which always goes to stderr; defaults to :code:`False` + +Result +---------------------------- + +The function :code:`validate_email()` returns the following results: + +:code:`True` + All requested checks were successful for the given email address. + +:code:`False` + At least one of the requested checks failed for the given email address. + +:code:`None` + None of the requested checks failed, but at least one of them yielded an ambiguous result. Currently, the SMTP check is the only check which can actually yield an ambigous result. + +Getting more information +---------------------------- The function :code:`validate_email_or_fail()` works exactly like :code:`validate_email`, except that it raises an exception in the case of validation failure and ambiguous result instead of returning :code:`False` or :code:`None`, respectively. +All these exceptions descend from :code:`EmailValidationError`. Please see below for the exact exceptions raised by the various checks. Note that all exception classes are defined in the module :code:`validate_email.exceptions`. + +Please note that :code:`SMTPTemporaryError` indicates an ambigous check result rather than a check failure, so if you use :code:`validate_email_or_fail()`, you probably want to catch this exception. + +The checks +============================ + +By default, all checks are enabled, but each of them can be disabled by one of the :code:`check_...` parameters. Note that, however, :code:`check_smtp` implies :code:`check_dns`. + +:code:`check_format` +---------------------------- + +Check whether the given email address conforms to the general format requirements of valid email addresses. + +:code:`validate_email_or_fail()` raises :code:`AddressFormatError` on any failure of this test. + +:code:`check_blacklist` +---------------------------- + +Check whether the domain part of the given email address (the part behind the "@") is known as a disposable and temporary email address domain. These are often used to register dummy users in order to spam or abuse some services. + +A list of such domains is maintained at https://github.com/martenson/disposable-email-domains, and this module uses that list. + +:code:`validate_email_or_fail()` raises :code:`DomainBlacklistedError` if the email address belongs to a blacklisted domain. + +:code:`check_dns` +---------------------------- + +Check whether there is a valid list of servers responsible for delivering emails to the given email address. + +First, a DNS query is issued for the email address' domain to retrieve a list of all MX records. That list is then stripped of duplicates and malformatted entries. If at the end of this procedure, at least one valid MX record remains, the check is considered successful. + +On failure of this check, :code:`validate_email_or_fail()` raises one of the following exceptions, all of which descend from :code:`DNSError`: + +:code:`DomainNotFoundError` + The domain of the email address cannot be found at all. + +:code:`NoNameserverError` + There is no nameserver for the domain. + +:code:`DNSTimeoutError` + A timeout occured when querying the nameserver. Note that the timeout period can be changed with the :code:`dns_timeout` parameter. + +:code:`DNSConfigurationError` + The nameserver is misconfigured. + +:code:`NoMXError` + The nameserver does not list any MX records for the domain. + +:code:`NoValidMXError` + The nameserver lists MX records for the domain, but none of them is valid. + +:code:`check_smtp` +---------------------------- + +Check whether the given email address exists by simulating an actual email delivery. + +A connection to the SMTP server identified through the domain's MX record is established, and an SMTP conversation is initiated up to the point where the server confirms the existence of the email address. After that, instead of actually sending an email, the conversation is cancelled. + The module will try to negotiate a TLS connection with STARTTLS, and silently fall back to an unencrypted SMTP connection if the server doesn't support it. +If the SMTP server replies to the :code:`RCPT TO` command with a code 250 (success) response, the check is considered successful. + +If the SMTP server replies with a code 5xx (permanent error) response at any point in the conversation, the check is considered failed. + +If the SMTP server cannot be connected, unexpectedly closes the connection, or replies with a code 4xx (temporary error) at any stage of the conversation, the check is considered ambiguous. + +If there is more than one valid MX record for the domain, they are tried in order of priority until the first time the check is either successful or failed. Only in case of an ambiguous check result, the next server is tried, and only if the check result is ambiguous for all servers, the overall check is considered ambigous as well. + +On failure of this check or on ambiguous result, :code:`validate_email_or_fail()` raises one of the following exceptions, all of which descend from :code:`SMTPError`: + +:code:`AddressNotDeliverableError` + The SMTP server permanently refused the email address. Technically, this means that the server replied to the :code:`RCPT TO` command with a code 5xx response. + +:code:`SMTPCommunicationError` + The SMTP server refused to even let us get to the point where we could ask it about the email address. Technically, this means that the server sent a code 5xx response either immediately after connection, or as a reply to the :code:`EHLO` (or :code:`HELO`) or :code:`MAIL FROM` commands. + +:code:`SMTPTemporaryError` + A temporary error occured during the check for all available MX servers. This is considered an ambigous check result. For example, greylisting is a frequent cause for this. + +All of the above three exceptions provide further detail about the error response(s) in the exception's instance variable :code:`error_messages`. + Auto-updater ============================ + The package contains an auto-updater for downloading and updating the built-in blacklist.txt. It will run on each module load (and installation), but will try to update the content only if the file is older than 5 days, and if the content is not the same that's already downloaded. The update can be triggered manually:: @@ -68,4 +171,5 @@ The update can be triggered manually:: Read the FAQ_! ============================ + .. _FAQ: https://github.com/karolyi/py3-validate-email/blob/master/FAQ.md diff --git a/tests/test_blacklist_check.py b/tests/test_blacklist_check.py index 1405847..987b713 100644 --- a/tests/test_blacklist_check.py +++ b/tests/test_blacklist_check.py @@ -20,20 +20,20 @@ class BlacklistCheckTestCase(TestCase): domainlist_check(EmailAddress('pm2@mailinator.com')) with self.assertRaises(DomainBlacklistedError): validate_email_or_fail( - email_address='pm2@mailinator.com', check_regex=False, - use_blacklist=True) + email_address='pm2@mailinator.com', check_format=False, + check_blacklist=True) with self.assertRaises(DomainBlacklistedError): validate_email_or_fail( - email_address='pm2@mailinator.com', check_regex=True, - use_blacklist=True) + email_address='pm2@mailinator.com', check_format=True, + check_blacklist=True) with self.assertLogs(): self.assertFalse(expr=validate_email( - email_address='pm2@mailinator.com', check_regex=False, - use_blacklist=True, debug=True)) + email_address='pm2@mailinator.com', check_format=False, + check_blacklist=True)) with self.assertLogs(): self.assertFalse(expr=validate_email( - email_address='pm2@mailinator.com', check_regex=True, - use_blacklist=True, debug=True)) + email_address='pm2@mailinator.com', check_format=True, + check_blacklist=True)) def test_blacklist_negative(self): 'Allows a domain not in the blacklist.' diff --git a/validate_email/dns_check.py b/validate_email/dns_check.py index a45ae15..2170500 100644 --- a/validate_email/dns_check.py +++ b/validate_email/dns_check.py @@ -49,7 +49,7 @@ def _get_cleaned_mx_records(domain: str, timeout: int) -> list: return result -def dns_check(email_address: EmailAddress, dns_timeout: int = 10) -> list: +def dns_check(email_address: EmailAddress, timeout: int = 10) -> list: """ Check whether there are any responsible SMTP servers for the email address by looking up the DNS MX records. @@ -62,4 +62,4 @@ def dns_check(email_address: EmailAddress, dns_timeout: int = 10) -> list: return [email_address.domain_literal_ip] else: return _get_cleaned_mx_records( - domain=email_address.domain, timeout=dns_timeout) + domain=email_address.domain, timeout=timeout) diff --git a/validate_email/domainlist_check.py b/validate_email/domainlist_check.py index 1415cd2..1f590da 100644 --- a/validate_email/domainlist_check.py +++ b/validate_email/domainlist_check.py @@ -56,11 +56,11 @@ class DomainListValidator(object): self.domain_blacklist = set( x.strip().lower() for x in lines if x.strip()) - def __call__(self, address: EmailAddress) -> bool: + def __call__(self, email_address: EmailAddress) -> bool: 'Do the checking here.' - if address.domain in self.domain_whitelist: + if email_address.domain in self.domain_whitelist: return True - if address.domain in self.domain_blacklist: + if email_address.domain in self.domain_blacklist: raise DomainBlacklistedError return True diff --git a/validate_email/exceptions.py b/validate_email/exceptions.py index 5d942a7..370b3b5 100644 --- a/validate_email/exceptions.py +++ b/validate_email/exceptions.py @@ -44,41 +44,41 @@ class DomainBlacklistedError(EmailValidationError): message = 'Domain blacklisted.' -class MXError(EmailValidationError): +class DNSError(EmailValidationError): """ Base class of all exceptions that indicate failure to determine a valid MX for the domain of email address. """ -class DomainNotFoundError(MXError): +class DomainNotFoundError(DNSError): 'Raised when the domain is not found.' message = 'Domain not found.' -class NoNameserverError(MXError): +class NoNameserverError(DNSError): 'Raised when the domain does not resolve by nameservers in time.' message = 'No nameserver found for domain.' -class DNSTimeoutError(MXError): +class DNSTimeoutError(DNSError): 'Raised when the domain lookup times out.' message = 'Domain lookup timed out.' -class DNSConfigurationError(MXError): +class DNSConfigurationError(DNSError): """ Raised when the DNS entries for this domain are falsely configured. """ message = 'Misconfigurated DNS entries for domain.' -class NoMXError(MXError): +class NoMXError(DNSError): 'Raised when the domain has no MX records configured.' message = 'No MX record for domain found.' -class NoValidMXError(MXError): +class NoValidMXError(DNSError): """ Raised when the domain has MX records configured, but none of them has a valid format. diff --git a/validate_email/regex_check.py b/validate_email/regex_check.py index b12d6fb..321627d 100644 --- a/validate_email/regex_check.py +++ b/validate_email/regex_check.py @@ -28,22 +28,22 @@ def _validate_ipv46_address(value: str) -> bool: return _validate_ipv4_address(value) or _validate_ipv6_address(value) -def regex_check(address: EmailAddress) -> bool: +def regex_check(email_address: EmailAddress) -> bool: 'Slightly adjusted email regex checker from the Django project.' # Validate user part. - if not USER_REGEX.match(address.user): + if not USER_REGEX.match(email_address.user): raise AddressFormatError # Validate domain part. - if address.domain_literal_ip: - literal_match = LITERAL_REGEX.match(address.ace_domain) + if email_address.domain_literal_ip: + literal_match = LITERAL_REGEX.match(email_address.ace_domain) if literal_match is None: raise AddressFormatError if not _validate_ipv46_address(literal_match[1]): raise AddressFormatError else: - if HOST_REGEX.match(address.ace_domain) is None: + if HOST_REGEX.match(email_address.ace_domain) is None: raise AddressFormatError # All validations successful. diff --git a/validate_email/smtp_check.py b/validate_email/smtp_check.py index 42b8ff9..63473f7 100644 --- a/validate_email/smtp_check.py +++ b/validate_email/smtp_check.py @@ -175,9 +175,10 @@ class _SMTPChecker(SMTP): def smtp_check( - email_address: EmailAddress, mx_records: list, debug: bool, - from_address: Optional[EmailAddress] = None, - helo_host: Optional[str] = None, smtp_timeout: int = 10) -> bool: + email_address: EmailAddress, mx_records: List[str], + timeout: float = 10, helo_host: Optional[str] = None, + from_address: Optional[EmailAddress] = None, debug: bool = False + ) -> bool: """ Returns `True` as soon as the any of the given server accepts the recipient address. @@ -196,6 +197,6 @@ def smtp_check( determined either. """ smtp_checker = _SMTPChecker( - local_hostname=helo_host, timeout=smtp_timeout, debug=debug, + local_hostname=helo_host, timeout=timeout, debug=debug, sender=from_address or email_address, recip=email_address) return smtp_checker.check(hosts=mx_records) diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index b21a9e1..2f62b93 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -12,6 +12,7 @@ from .smtp_check import smtp_check LOGGER = getLogger(name=__name__) +__all__ = ['validate_email', 'validate_email_or_fail'] __doc__ = """\ Verify the given email address by determining the SMTP servers responsible for the domain and then asking them to deliver an email to @@ -26,39 +27,39 @@ simply accept everything and send a bounce notification later. Hence, a def validate_email_or_fail( - email_address: str, check_regex: bool = True, check_mx: bool = True, - from_address: Optional[str] = None, helo_host: Optional[str] = None, - smtp_timeout: int = 10, dns_timeout: int = 10, - use_blacklist: bool = True, debug: bool = False, - skip_smtp: bool = False) -> Optional[bool]: + email_address: str, *, check_format: bool = True, + check_blacklist: bool = True, check_dns: bool = True, + dns_timeout: float = 10, check_smtp: bool = True, + smtp_timeout: float = 10, smtp_helo_host: Optional[str] = None, + smtp_from_address: Optional[str] = None, smtp_debug: bool = False + ) -> Optional[bool]: """ Return `True` if the email address validation is successful, `None` if the validation result is ambigious, and raise an exception if the validation fails. """ email_address = EmailAddress(address=email_address) - if from_address is not None: + if check_format: + regex_check(email_address=email_address) + if check_blacklist: + domainlist_check(email_address=email_address) + if not (check_dns or check_smtp): # check_smtp implies check_dns. + return True + mx_records = dns_check(email_address=email_address, timeout=dns_timeout) + if not check_smtp: + return True + if smtp_from_address is not None: try: - from_address = EmailAddress(address=from_address) + smtp_from_address = EmailAddress(address=smtp_from_address) except AddressFormatError: raise FromAddressFormatError - if check_regex: - regex_check(address=email_address) - if use_blacklist: - domainlist_check(address=email_address) - if not check_mx: - return True - mx_records = dns_check( - email_address=email_address, dns_timeout=dns_timeout) - if skip_smtp: - return True return smtp_check( email_address=email_address, mx_records=mx_records, - from_address=from_address, helo_host=helo_host, - smtp_timeout=smtp_timeout, debug=debug) + timeout=smtp_timeout, helo_host=smtp_helo_host, + from_address=smtp_from_address, debug=smtp_debug) -def validate_email(email_address: str, *args, **kwargs): +def validate_email(email_address: str, **kwargs): """ Return `True` or `False` depending if the email address exists or/and can be delivered. @@ -66,7 +67,7 @@ def validate_email(email_address: str, *args, **kwargs): Return `None` if the result is ambigious. """ try: - return validate_email_or_fail(email_address, *args, **kwargs) + return validate_email_or_fail(email_address, **kwargs) except SMTPTemporaryError as error: LOGGER.info(msg=f'Validation for {email_address!r} ambigious: {error}') return From 7729037830a945ec0eaa91c80e1716daa2cd60ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Fri, 12 Mar 2021 14:38:44 +0100 Subject: [PATCH 22/27] Add extensive unit tests for smtp_check module --- tests/test_smtp_check.py | 124 +++++++++++++++++++++++++++++---------- 1 file changed, 93 insertions(+), 31 deletions(-) diff --git a/tests/test_smtp_check.py b/tests/test_smtp_check.py index 5266197..26b3747 100644 --- a/tests/test_smtp_check.py +++ b/tests/test_smtp_check.py @@ -1,38 +1,100 @@ -from smtplib import SMTP +from smtplib import SMTPServerDisconnected +from socket import timeout from unittest.case import TestCase from unittest.mock import patch -from validate_email.exceptions import SMTPMessage, SMTPTemporaryError -from validate_email.smtp_check import _SMTPChecker +from validate_email.email_address import EmailAddress +from validate_email.exceptions import ( + AddressNotDeliverableError, SMTPCommunicationError, SMTPTemporaryError) +from validate_email.smtp_check import _SMTPChecker, smtp_check -class SMTPCheckerTest(TestCase): - 'Checking the `_SMTPChecker` class methods.' +class SMTPMock(_SMTPChecker): + """ + Mock replacement for the SMTP connection. - @patch.object(target=SMTP, attribute='connect') - def test_connect_raises_serverdisconnected(self, mock_connect): - 'Connect raises `SMTPTemporaryError`.' - mock_connect.side_effect = OSError('test message') - checker = _SMTPChecker( - local_hostname='localhost', timeout=5, debug=False, - sender='test@example.com', recip='test@example.com') - with self.assertRaises(SMTPTemporaryError) as exc: - checker.check(hosts=['testhost']) - self.assertDictEqual(exc.exception.error_messages, { - 'testhost': SMTPMessage( - command='connect', code=451, text='test message') - }) + Instead of really communicating with an SMTP server, this class + works with predefined fake responses. By default, the responses + emulate a successful SMTP conversation, but it can be turned into an + unsuccessful one by patching the `reply` dictionary. + """ + reply = { + None: (220, b'Welcome'), + "EHLO": (502, b'Please use HELO'), + 'HELO': (220, b'HELO successful'), + 'MAIL': (250, b'MAIL FROM successful'), + 'RCPT': (250, b'RCPT TO successful'), + 'QUIT': (221, b'QUIT successful'), + } - @patch.object(target=SMTP, attribute='connect') - def test_connect_with_error(self, mock_connect): - 'Connect raises `SMTPTemporaryError`.' - checker = _SMTPChecker( - local_hostname='localhost', timeout=5, debug=False, - sender='test@example.com', recip='test@example.com') - mock_connect.return_value = (400, b'test delay message') - with self.assertRaises(SMTPTemporaryError) as exc: - checker.check(hosts=['testhost']) - self.assertDictEqual(exc.exception.error_messages, { - 'testhost': SMTPMessage( - command='connect', code=400, text='test delay message') - }) + last_command = None + + def _get_socket(self, host, port, timeout): + return None + + def send(self, s): + self.last_command = s[:4].upper() + + def getreply(self): + if isinstance(self.reply[self.last_command], Exception): + self.close() + raise self.reply[self.last_command] + return self.reply[self.last_command] + + +class SMTPCheckTest(TestCase): + 'Collection of tests the `smtp_check` method.' + + # All the possible ways to fail we want to test, listed as tuples + # containing (command, reply, expected exception). + failures = [ + # Timeout on connection + (None, timeout(), SMTPTemporaryError), + # Connection unexpectedly closed during any stage + (None, SMTPServerDisconnected('Test'), SMTPTemporaryError), + ('EHLO', SMTPServerDisconnected('Test'), SMTPTemporaryError), + ('HELO', SMTPServerDisconnected('Test'), SMTPTemporaryError), + ('MAIL', SMTPServerDisconnected('Test'), SMTPTemporaryError), + ('RCPT', SMTPServerDisconnected('Test'), SMTPTemporaryError), + # Temporary error codes + (None, (421, b'Connect failed'), SMTPTemporaryError), + ('HELO', (421, b'HELO failed'), SMTPTemporaryError), + ('MAIL', (451, b'MAIL FROM failed'), SMTPTemporaryError), + ('RCPT', (451, b'RCPT TO failed'), SMTPTemporaryError), + # Permanent error codes + (None, (554, b'Connect failed'), SMTPCommunicationError), + ('HELO', (504, b'HELO failed'), SMTPCommunicationError), + ('MAIL', (550, b'MAIL FROM failed'), SMTPCommunicationError), + ('RCPT', (550, b'RCPT TO failed'), AddressNotDeliverableError), + ] + + @patch(target='validate_email.smtp_check._SMTPChecker', new=SMTPMock) + def test_smtp_success(self): + 'Succeeds on successful SMTP conversation' + self.assertTrue( + smtp_check( + email_address=EmailAddress('alice@example.com'), + mx_records=['smtp.example.com'], + ) + ) + + def _test_one_smtp_failure(self, cmd, reply, exception): + with patch.dict(in_dict=SMTPMock.reply, values={cmd: reply}): + with self.assertRaises(exception) as context: + smtp_check( + email_address=EmailAddress('alice@example.com'), + mx_records=['smtp.example.com'], + ) + if isinstance(reply, tuple): + error_messages = context.exception.error_messages + error_info = error_messages['smtp.example.com'] + self.assertEqual(error_info.command[:4].upper(), cmd or 'CONN') + self.assertEqual(error_info.code, reply[0]) + self.assertEqual(error_info.text, reply[1].decode()) + + @patch(target='validate_email.smtp_check._SMTPChecker', new=SMTPMock) + def test_smtp_failure(self): + 'Fails on unsuccessful SMTP conversation.' + for cmd, reply, exception in self.failures: + with self.subTest(cmd=cmd, reply=reply): + self._test_one_smtp_failure(cmd, reply, exception) From 1cdd99b3f1079c306199b7fad0a4d78a036a5e3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Sat, 13 Mar 2021 11:04:51 +0100 Subject: [PATCH 23/27] Add some more (hopefully helpful) remarks to the FAQ --- FAQ.md | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/FAQ.md b/FAQ.md index 85a728d..25a7617 100644 --- a/FAQ.md +++ b/FAQ.md @@ -2,15 +2,23 @@ ## The module provides false positives: -Some SMTP Servers (Yahoo's servers for example) are only rejecting +The function of this module, and specifically of the SMTP check, relies +on the assumption that the mail server declared responsible for an email +domain will immediately reject any nonexistent address. + +Some SMTP servers (Yahoo's servers for example) are only rejecting nonexistent emails after the end of `DATA` command has been provided in the conversation with the server. This module only goes until the `RCPT TO` and says it's valid if it doesn't get rejected there, since -the `DATA` part of the email is the email body itself. There's not much -one can do with it, you have to accept false positives in the case of -yahoo.com and some other providers. I'm not sure if rejecting emails -after the `DATA` command is a valid behavior based on the SMTP RFC, but -I wouldn't wonder if not. +the `DATA` part of the email is the email body itself. + +Other SMTP servers accept emails even for nonexistent recipient +addresses and forward them to a different server which will create a +bounce message in a second step. This is the case for many email domains +hosted at Microsoft. + +In both cases, there's nothing we can do about it, as the mail server +we talk to seemingly accepts the email address. ## Everything gets rejected: @@ -44,3 +52,19 @@ looking at the the logs, then (and only then) add an issue explaining your problem with a REPRODUCIBLE example, and the output of your test run. +## How can I pass my email account's credentials? How can I use port 465 or 587 when my provider blocks port 25? + +The credentials you got from your email provider, as well as the +instruction to use port 465 or 587, refers to *your provider's* server +for *outgoing* emails. + +This module, however, directly talks to the *recipient's* server for +*incoming* emails, so neither your credentials nor the switch to port +465 or 587 is of any use here. + +If your internet connection is within a dynamic range (often the case +for private use) or it doesn't have a proper reverse DNS entry, the +servers for many email domains will reject connections from you. This +can *not* be solved by using your provider's mail server. Instead, you +have to use the library on a machine with an internet connection with +static IP address and a proper reverse DNS entry. From cd47b2ef665e0d12dd9157075755f91dd69f9236 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Sat, 13 Mar 2021 11:06:23 +0100 Subject: [PATCH 24/27] Fix typo --- FAQ.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FAQ.md b/FAQ.md index 25a7617..9d1bd18 100644 --- a/FAQ.md +++ b/FAQ.md @@ -55,7 +55,7 @@ run. ## How can I pass my email account's credentials? How can I use port 465 or 587 when my provider blocks port 25? The credentials you got from your email provider, as well as the -instruction to use port 465 or 587, refers to *your provider's* server +instruction to use port 465 or 587, refer to *your provider's* server for *outgoing* emails. This module, however, directly talks to the *recipient's* server for From 8641dfe0918988ec8853e9b63287de90d6ad2dda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Sat, 13 Mar 2021 17:18:49 +0100 Subject: [PATCH 25/27] Adjust bugreport template --- .github/ISSUE_TEMPLATE/bug_report.md | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 8a09ca9..ae28223 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -10,18 +10,23 @@ assignees: '' - [ ] I have read and understood the [FAQ](https://github.com/karolyi/py3-validate-email/blob/master/FAQ.md) **Describe the bug** + A clear and concise description of what the bug is. **My debug output** + Output from the debug run described in the FAQ: **Expected behavior** + A clear and concise description of what you expected to happen. -**Desktop (please complete the following information):** +**Please complete the following information:** - OS: [e.g. Linux, FreeBSD, Windows] - Flavor and Version [e.g. Debian 22, FreeBSD 12.2] -- Your network environment (ISP provided home connecton, or testing from an actual whitelisted server) + - Your network environment (ISP provided home connecton, or testing from an actual whitelisted server) + - Your exact `py3-validate-email` module version **Additional context** + Add any other context about the problem here. From dbd0c1285d7261898d9da9e9e4baa51ebd8f246e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Reinhard=20M=C3=BCller?= Date: Sun, 14 Mar 2021 13:24:24 +0100 Subject: [PATCH 26/27] Small formatting and doc tweaks --- FAQ.md | 13 +++++++------ validate_email/smtp_check.py | 8 ++++---- validate_email/validate_email.py | 14 +++++++------- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/FAQ.md b/FAQ.md index 9d1bd18..e0c6d20 100644 --- a/FAQ.md +++ b/FAQ.md @@ -62,9 +62,10 @@ This module, however, directly talks to the *recipient's* server for *incoming* emails, so neither your credentials nor the switch to port 465 or 587 is of any use here. -If your internet connection is within a dynamic range (often the case -for private use) or it doesn't have a proper reverse DNS entry, the -servers for many email domains will reject connections from you. This -can *not* be solved by using your provider's mail server. Instead, you -have to use the library on a machine with an internet connection with -static IP address and a proper reverse DNS entry. +If your internet connection is within an IP pool (often the case for +private use) or it doesn't have a proper reverse DNS entry, the servers +for many email domains (depending on their configuration) will reject +connections from you. This can *not* be solved by using your provider's +mail server. Instead, you have to use the library on a machine with an +internet connection with static IP address and a proper reverse DNS +entry. diff --git a/validate_email/smtp_check.py b/validate_email/smtp_check.py index 63473f7..8edda3a 100644 --- a/validate_email/smtp_check.py +++ b/validate_email/smtp_check.py @@ -175,10 +175,10 @@ class _SMTPChecker(SMTP): def smtp_check( - email_address: EmailAddress, mx_records: List[str], - timeout: float = 10, helo_host: Optional[str] = None, - from_address: Optional[EmailAddress] = None, debug: bool = False - ) -> bool: + email_address: EmailAddress, mx_records: List[str], timeout: float = 10, + helo_host: Optional[str] = None, + from_address: Optional[EmailAddress] = None, debug: bool = False +) -> bool: """ Returns `True` as soon as the any of the given server accepts the recipient address. diff --git a/validate_email/validate_email.py b/validate_email/validate_email.py index 2f62b93..d77b272 100644 --- a/validate_email/validate_email.py +++ b/validate_email/validate_email.py @@ -27,12 +27,12 @@ simply accept everything and send a bounce notification later. Hence, a def validate_email_or_fail( - email_address: str, *, check_format: bool = True, - check_blacklist: bool = True, check_dns: bool = True, - dns_timeout: float = 10, check_smtp: bool = True, - smtp_timeout: float = 10, smtp_helo_host: Optional[str] = None, - smtp_from_address: Optional[str] = None, smtp_debug: bool = False - ) -> Optional[bool]: + email_address: str, *, check_format: bool = True, + check_blacklist: bool = True, check_dns: bool = True, + dns_timeout: float = 10, check_smtp: bool = True, + smtp_timeout: float = 10, smtp_helo_host: Optional[str] = None, + smtp_from_address: Optional[str] = None, smtp_debug: bool = False +) -> Optional[bool]: """ Return `True` if the email address validation is successful, `None` if the validation result is ambigious, and raise an exception if the @@ -43,7 +43,7 @@ def validate_email_or_fail( regex_check(email_address=email_address) if check_blacklist: domainlist_check(email_address=email_address) - if not (check_dns or check_smtp): # check_smtp implies check_dns. + if not check_dns and not check_smtp: # check_smtp implies check_dns. return True mx_records = dns_check(email_address=email_address, timeout=dns_timeout) if not check_smtp: From 92194c93d538600fe5e1c2f46b66a223e8423a40 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?L=C3=A1szl=C3=B3=20K=C3=A1rolyi?= Date: Sun, 14 Mar 2021 14:06:46 +0100 Subject: [PATCH 27/27] Bump idna dependency --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 36856be..e7c53ea 100644 --- a/setup.py +++ b/setup.py @@ -58,7 +58,7 @@ setup( name='py3-validate-email', version='0.2.16', packages=find_packages(exclude=['tests']), - install_requires=['dnspython~=2.0', 'idna~=2.10', 'filelock~=3.0'], + install_requires=['dnspython~=2.0', 'idna~=3.0', 'filelock~=3.0'], author='László Károlyi', author_email='laszlo@karolyi.hu', description=(