Fixed #119 - Adds support to non-anscii indexing and search.
This commit is contained in:
parent
e6d2350790
commit
ab26c30d90
|
@ -1,3 +1,5 @@
|
|||
from __future__ import unicode_literals
|
||||
|
||||
import datetime
|
||||
from django.test import TestCase
|
||||
|
||||
|
@ -7,6 +9,7 @@ from haystack.query import SearchQuerySet
|
|||
|
||||
from xapian_tests.models import Document
|
||||
from xapian_tests.search_indexes import DocumentIndex
|
||||
from xapian_tests.tests.test_xapian_backend import get_terms
|
||||
|
||||
|
||||
def pks(results):
|
||||
|
@ -25,7 +28,7 @@ class LiveXapianTestCase(TestCase):
|
|||
datetime.date(year=2010, month=2, day=1),
|
||||
datetime.date(year=2010, month=3, day=1)]
|
||||
|
||||
summaries = ['This is a huge summary',
|
||||
summaries = ['This is a huge corrup\xe7\xe3o summary',
|
||||
'This is a medium summary',
|
||||
'This is a small summary']
|
||||
|
||||
|
@ -119,8 +122,8 @@ class LiveXapianTestCase(TestCase):
|
|||
self.assertEqual(pks(self.queryset.filter(django_id__in=[2, 4])),
|
||||
pks(Document.objects.filter(id__in=[2, 4])))
|
||||
|
||||
self.assertEqual(pks(self.queryset.models(Document)),
|
||||
pks(Document.objects.all()))
|
||||
self.assertEqual(set(pks(self.queryset.models(Document))),
|
||||
set(pks(Document.objects.all())))
|
||||
|
||||
def test_field_startswith(self):
|
||||
self.assertEqual(len(self.queryset.filter(name__startswith='magaz')), 4)
|
||||
|
@ -134,20 +137,20 @@ class LiveXapianTestCase(TestCase):
|
|||
self.assertEqual(len(self.queryset.filter(name=AutoQuery("8 AND 4"))), 0)
|
||||
|
||||
def test_value_range(self):
|
||||
self.assertEqual(pks(self.queryset.filter(number__lt=3)),
|
||||
pks(Document.objects.filter(number__lt=3)))
|
||||
self.assertEqual(set(pks(self.queryset.filter(number__lt=3))),
|
||||
set(pks(Document.objects.filter(number__lt=3))))
|
||||
|
||||
self.assertEqual(pks(self.queryset.filter(django_id__gte=6)),
|
||||
pks(Document.objects.filter(id__gte=6)))
|
||||
self.assertEqual(set(pks(self.queryset.filter(django_id__gte=6))),
|
||||
set(pks(Document.objects.filter(id__gte=6))))
|
||||
|
||||
def test_date_range(self):
|
||||
date = datetime.date(year=2010, month=2, day=1)
|
||||
self.assertEqual(pks(self.queryset.filter(date__gte=date)),
|
||||
pks(Document.objects.filter(date__gte=date)))
|
||||
self.assertEqual(set(pks(self.queryset.filter(date__gte=date))),
|
||||
set(pks(Document.objects.filter(date__gte=date))))
|
||||
|
||||
date = datetime.date(year=2010, month=3, day=1)
|
||||
self.assertEqual(pks(self.queryset.filter(date__lte=date)),
|
||||
pks(Document.objects.filter(date__lte=date)))
|
||||
self.assertEqual(set(pks(self.queryset.filter(date__lte=date))),
|
||||
set(pks(Document.objects.filter(date__lte=date))))
|
||||
|
||||
def test_order_by(self):
|
||||
# private order
|
||||
|
@ -166,3 +169,9 @@ class LiveXapianTestCase(TestCase):
|
|||
self.assertEqual(pks(self.queryset.order_by("-date")),
|
||||
pks(Document.objects.order_by("-date")))
|
||||
|
||||
def test_non_ascii_search(self):
|
||||
"""
|
||||
Regression test for #119.
|
||||
"""
|
||||
self.assertEqual(pks(self.queryset.filter(content='corrup\xe7\xe3o')),
|
||||
pks(Document.objects.filter(summary__contains='corrup\xe7\xe3o')))
|
||||
|
|
|
@ -111,6 +111,7 @@ class XapianSimpleMockIndex(indexes.SearchIndex):
|
|||
text = indexes.CharField(document=True)
|
||||
author = indexes.CharField(model_attr='author')
|
||||
url = indexes.CharField()
|
||||
non_anscii = indexes.CharField()
|
||||
|
||||
datetime = indexes.DateTimeField(model_attr='pub_date')
|
||||
date = indexes.DateField()
|
||||
|
@ -131,6 +132,9 @@ class XapianSimpleMockIndex(indexes.SearchIndex):
|
|||
def prepare_url(self, obj):
|
||||
return 'http://example.com/1/'
|
||||
|
||||
def prepare_non_anscii(self, obj):
|
||||
return 'thsi sdas das corrup\xe7\xe3o das'
|
||||
|
||||
def prepare_datetime(self, obj):
|
||||
return datetime.datetime(2009, 2, 25, 1, 1, 1)
|
||||
|
||||
|
@ -275,6 +279,10 @@ class XapianBackendTestCase(HaystackBackendTestCase, TestCase):
|
|||
self.assertTrue('XDECIMAL_NUMBER22.34' in terms)
|
||||
self.assertFalse('ZXDECIMAL_NUMBER22.34' in terms)
|
||||
|
||||
def test_non_ascii_chars(self):
|
||||
terms = get_terms(self.backend, '-a')
|
||||
self.assertIn('corrup\xe7\xe3o', terms)
|
||||
|
||||
|
||||
class XapianSearchBackendTestCase(HaystackBackendTestCase, TestCase):
|
||||
|
||||
|
|
|
@ -1318,7 +1318,7 @@ class XapianSearchQuery(BaseSearchQuery):
|
|||
unstemmed_term = constructor.format(prefix=prefix, term=term)
|
||||
if stemmed:
|
||||
stem = xapian.Stem(self.backend.language)
|
||||
stemmed_term = 'Z' + constructor.format(prefix=prefix, term=stem(term))
|
||||
stemmed_term = 'Z' + constructor.format(prefix=prefix, term=stem(term).decode('utf-8'))
|
||||
|
||||
return xapian.Query(xapian.Query.OP_OR,
|
||||
xapian.Query(stemmed_term),
|
||||
|
|
Loading…
Reference in New Issue