From 870b48dfcd889f10ced71aca84308d72b1d6441a Mon Sep 17 00:00:00 2001 From: AJ Slater Date: Thu, 10 Feb 2022 11:44:21 -0800 Subject: [PATCH] Management Command Database Locking --- .github/workflows/test.yml | 5 ++-- CHANGELOG.rst | 2 ++ README.rst | 2 ++ requirements.txt | 1 + setup.py | 1 + .../tests/test_management_commands.py | 19 ++++++++++++ xapian_backend.py | 30 +++++++++++++++++++ 7 files changed, 57 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 4403685..695fe9b 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -44,15 +44,14 @@ jobs: python-version: ['3.7', '3.8', '3.9', '3.10'] django-version: ['2.2', '3.2', '4.0'] xapian-version: ['1.4.18'] + filelock-version: ['3.4.2'] exclude: # Django added python 3.10 support in 3.2.9 - python-version: '3.10' django-version: '2.2' - xapian-version: '1.4.18' # Django dropped python 3.7 support in 4.0 - python-version: '3.7' django-version: '4.0' - xapian-version: '1.4.18' steps: - name: Set up Python ${{ matrix.python-version }} @@ -74,7 +73,7 @@ jobs: - name: Install Django and other Python dependencies run: | python -m pip install --upgrade pip - pip install django~=${{ matrix.django-version }} coveralls xapian*.whl + pip install django~=${{ matrix.django-version }} filelock~=${{ matrix.filelock-version }} coveralls xapian*.whl - name: Checkout django-haystack uses: actions/checkout@v2 diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 47026bb..c0fa0b6 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -6,6 +6,8 @@ Unreleased ---------- - Dropped support for Python 3.6. +- Fixed DatabaseLocked errors when running management commands with + multiple workers. v3.0.1 (2021-11-12) ------------------- diff --git a/README.rst b/README.rst index 2bf1ec7..581b0d4 100644 --- a/README.rst +++ b/README.rst @@ -92,6 +92,8 @@ The backend has the following optional settings: See `here `__ for more information about the different strategies. +- ``HAYSTACK_XAPIAN_USE_LOCKFILE``: Use a lockfile to prevent database locking errors when running management commands with multiple workers. + Defaults to `True`. Testing ------- diff --git a/requirements.txt b/requirements.txt index 6c97213..fe8eb47 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1,3 @@ Django>=2.2 Django-Haystack>=3.0 +filelock>=3.4 diff --git a/setup.py b/setup.py index 871773d..07f8944 100644 --- a/setup.py +++ b/setup.py @@ -28,5 +28,6 @@ setup( install_requires=[ 'django>=2.2', 'django-haystack>=2.8.0', + 'filelock>=3.4', ] ) diff --git a/tests/xapian_tests/tests/test_management_commands.py b/tests/xapian_tests/tests/test_management_commands.py index da1ed2a..555b035 100644 --- a/tests/xapian_tests/tests/test_management_commands.py +++ b/tests/xapian_tests/tests/test_management_commands.py @@ -1,3 +1,5 @@ +import sys +from io import StringIO from unittest import TestCase from django.core.management import call_command @@ -82,3 +84,20 @@ class ManagementCommandTestCase(HaystackBackendTestCase, TestCase): # … but remove does: call_command("update_index", remove=True, verbosity=0) self.verify_indexed_document_count(self.NUM_BLOG_ENTRIES - 3) + + def test_multiprocessing(self): + self.verify_indexed_document_count(0) + + old_stderr = sys.stderr + sys.stderr = StringIO() + call_command( + "update_index", + verbosity=2, + workers=10, + batchsize=2, + ) + err = sys.stderr.getvalue() + sys.stderr = old_stderr + print(err) + self.assertNotIn("xapian.DatabaseLockError", err) + self.verify_indexed_documents() diff --git a/xapian_backend.py b/xapian_backend.py index 0b15ec7..562ded3 100755 --- a/xapian_backend.py +++ b/xapian_backend.py @@ -1,5 +1,6 @@ import datetime import pickle +from pathlib import Path import os import re import shutil @@ -8,6 +9,8 @@ import sys from django.conf import settings from django.core.exceptions import ImproperlyConfigured +from filelock import FileLock + from haystack import connections from haystack.backends import BaseEngine, BaseSearchBackend, BaseSearchQuery, SearchNode, log_query from haystack.constants import ID, DJANGO_ID, DJANGO_CT, DEFAULT_OPERATOR @@ -73,6 +76,24 @@ INTEGER_FORMAT = '%012d' # texts with positional information TERMPOS_DISTANCE = 100 + +def filelocked(func): + """Decorator to wrap a XapianSearchBackend method in a filelock.""" + + def wrapper(self, *args, **kwargs): + """Run the function inside a lock.""" + if self.path == MEMORY_DB_NAME or not self.use_lockfile: + func(self, *args, **kwargs) + else: + lockfile = Path(self.filelock.lock_file) + lockfile.parent.mkdir(parents=True, exist_ok=True) + lockfile.touch() + with self.filelock: + func(self, *args, **kwargs) + + return wrapper + + class InvalidIndexError(HaystackError): """Raised when an index can not be opened.""" pass @@ -168,6 +189,9 @@ class XapianSearchBackend(BaseSearchBackend): Also sets the stemming language to be used to `language`. """ + self.use_lockfile = bool( + getattr(settings, 'HAYSTACK_XAPIAN_USE_LOCKFILE', True) + ) super().__init__(connection_alias, **connection_options) if not 'PATH' in connection_options: @@ -182,6 +206,10 @@ class XapianSearchBackend(BaseSearchBackend): except FileExistsError: pass + if self.use_lockfile: + lockfile = Path(self.path) / "lockfile" + self.filelock = FileLock(lockfile) + self.flags = connection_options.get('FLAGS', DEFAULT_XAPIAN_FLAGS) self.language = getattr(settings, 'HAYSTACK_XAPIAN_LANGUAGE', 'english') @@ -225,6 +253,7 @@ class XapianSearchBackend(BaseSearchBackend): self._update_cache() return self._columns + @filelocked def update(self, index, iterable, commit=True): """ Updates the `index` with any objects in `iterable` by adding/updating @@ -476,6 +505,7 @@ class XapianSearchBackend(BaseSearchBackend): finally: database.close() + @filelocked def remove(self, obj, commit=True): """ Remove indexes for `obj` from the database.