Elasticsearch autocomplete on related keywords
Thu 21 April 2016 by Thejaswi PuthrayaRecently, I had to work on a client request to add autocomplete to their site's search based not just on a specific list but on related keywords.
For example, suppose you had a list of TV series like Simpsons, Futurama, Tom and Jerry etc. The autosearch had to suggest the series based on character names like Homer, Thomas etc as well.
The project piggy-backed on the elasticsearch-dsl python library.
The list of objects that had to be displayed in the autocomplete were saved in a django model and the related keywords in a column (as JSON) in the same db table.
So every time the django model was saved, the autocomplete index would be built afresh.
Here's a snippet of this:
from elasticsearch_dsl import Index
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl.mapping import Mapping
from .models import TVSeries
@receiver(post_save, sender=TVSeries)
def update_category_autosuggest(sender, **kwargs):
try:
es = connections.get_connection('default')
except KeyError:
es = connections.create_connection(
'default',
hosts=settings.ELASTICSEARCH_HOSTS
)
# settings.AUTOSUGGEST_INDEX is the name of the index
index = Index(settings.AUTOSUGGEST_INDEX).delete(ignore=404)
mm = Mapping('some_mapping_name')
# Creating an Elasticsearch schema with the fields and data types
mm.field("series", "string")
mm.field("keywords", "string", multi=True)
mm.field(
"autosuggest",
"completion",
index_analyzer="simple",
search_analyzer="simple",
payloads=True
)
index = settings.AUTOSUGGEST_INDEX
mm.save(index)
for series in TVSeries.objects.filter(is_active=True):
# Example: {"keywords": ["homer simpson", "thomas", "tom", "bart simpson"]}
keywords = json.loads(series.json_data).get("keywords", [])
if not keywords:
continue
new_kwargs = keywords
# Add the name of the TV series as well along
# with the keywords
for cat in series.name.split(" "):
if len(cat) < 2:
continue
new_kwargs.append(cat.lower())
data = {
"series": series.name,
"keywords": keywords,
"autosuggest": {
"input": new_kwargs,
"output": series.name,
# There's a slug field for the TV Series which
# references the correct URL of the series
"payload": series.slug
}
}
meta = es.index(
index=index,
doc_type=mm.doc_type,
body=data)
After the index is built, the search query needs to be handled. In the view:
from elasticsearch_dsl.connections import connections
from elasticsearch_dsl import Search
def series_autocomplete(request):
query = request.GET.get("q", "").strip()
try:
CONNECTION = connections.get_connection("default")
except KeyError:
CONNECTION = connections.create_connection(
"default",
hosts=settings.ELASTICSEARCH_HOSTS
)
ss = Search(CONNECTION, index=settings.AUTOSUGGEST_INDEX)
results = []
for qq in query.split(" "):
response = ss.suggest(
"suggestions",
qq,
completion={
"field": "autosuggest",
"fuzzy": True
}
).execute().to_dict()
results.extend(response["suggest"]["suggestions"])
return HttpResponse(
json.dumps(results),
content_type="application/json")