Elasticsearch autocomplete on related keywords

Thu 21 April 2016 by Thejaswi Puthraya

Recently, I had to work on a client request to add autocomplete to their site's search based not just on a specific list but on related keywords.

For example, suppose you had a list of TV series like Simpsons, Futurama, Tom and Jerry etc. The autosearch had to suggest the series based on character names like Homer, Thomas etc as well.

The project piggy-backed on the elasticsearch-dsl python library.

The list of objects that had to be displayed in the autocomplete were saved in a django model and the related keywords in a column (as JSON) in the same db table.

So every time the django model was saved, the autocomplete index would be built afresh.

Here's a snippet of this:

from elasticsearch_dsl import Index
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl.mapping import Mapping

from .models import TVSeries


@receiver(post_save, sender=TVSeries)
def update_category_autosuggest(sender, **kwargs):
    try:
        es = connections.get_connection('default')
    except KeyError:
        es = connections.create_connection(
            'default',
            hosts=settings.ELASTICSEARCH_HOSTS
        )
     # settings.AUTOSUGGEST_INDEX is the name of the index
     index = Index(settings.AUTOSUGGEST_INDEX).delete(ignore=404)

     mm = Mapping('some_mapping_name')
     # Creating an Elasticsearch schema with the fields and data types
     mm.field("series", "string")
     mm.field("keywords", "string", multi=True)
     mm.field(
         "autosuggest",
         "completion",
         index_analyzer="simple",
         search_analyzer="simple",
         payloads=True
     )
     index = settings.AUTOSUGGEST_INDEX
     mm.save(index)

     for series in TVSeries.objects.filter(is_active=True):
         # Example: {"keywords": ["homer simpson", "thomas", "tom", "bart simpson"]}
         keywords = json.loads(series.json_data).get("keywords", [])
         if not keywords:
             continue
         new_kwargs = keywords
         # Add the name of the TV series as well along
         # with the keywords
         for cat in series.name.split(" "):
             if len(cat) < 2:
                 continue
             new_kwargs.append(cat.lower())
         data = {
             "series": series.name,
             "keywords": keywords,
             "autosuggest": {
                 "input": new_kwargs,
                 "output": series.name,
                 # There's a slug field for the TV Series which
                 # references the correct URL of the series
                 "payload": series.slug
             }
         }
         meta = es.index(
             index=index,
             doc_type=mm.doc_type,
             body=data)

After the index is built, the search query needs to be handled. In the view:

from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Search

def series_autocomplete(request):
    query = request.GET.get("q", "").strip()
    try:
        CONNECTION = connections.get_connection("default")
    except KeyError:
        CONNECTION = connections.create_connection(
            "default",
            hosts=settings.ELASTICSEARCH_HOSTS
        )
    ss = Search(CONNECTION, index=settings.AUTOSUGGEST_INDEX)
    results = []
    for qq in query.split(" "):
        response = ss.suggest(
            "suggestions",
            qq,
            completion={
                "field": "autosuggest",
                "fuzzy": True
            }
        ).execute().to_dict()
        results.extend(response["suggest"]["suggestions"])
    return HttpResponse(
        json.dumps(results),
        content_type="application/json")