So common use case of a Search Engine.. Don’t you observe Google highlights the keywords the user searches for? That is what we achieve in this example below
import whoosh,os | |
from whoosh import index | |
import whoosh.index | |
import whoosh.fields | |
import whoosh.qparser | |
import tornado.ioloop | |
import tornado.web | |
from whoosh import analysis, fields, formats, query, highlight | |
class Search(object): | |
def __init__(self, indexdir, searchstr=None): | |
self.indexdir = indexdir | |
self.searchstr = searchstr | |
def searcher(self): | |
schema = whoosh.fields.Schema( | |
path = whoosh.fields.ID(unique=True, stored=True), | |
title = whoosh.fields.TEXT(stored=True, phrase=False), | |
content = whoosh.fields.TEXT(stored=True, spelling=True), | |
tag = whoosh.fields.TEXT(stored=True), | |
category = whoosh.fields.TEXT(stored=True)) | |
if not os.path.exists(self.indexdir): | |
os.mkdir(self.indexdir) | |
ix = index.create_in(self.indexdir, schema) | |
writer = ix.writer() | |
writer.add_document(title=u"Welcome", content=u"This is welcome blog!", | |
path=u"/welcome", tag=u"Welcome", category=u"Welcome") | |
writer.add_document(title=u"Python Whoosh", content=u"Whoosh search library in pure Python", | |
path=u"/whoosh", tag=u"whoosh", category=u"Search") | |
writer.add_document(title=u"Python Tornado", content=u"Tornado Web Server for real-time web apps", | |
path=u"/tornado", tag=u"tornado", category=u"Web Server") | |
writer.add_document(title=u"Python Tornado Async", content=u"Tornado Web Server provides async web requests", | |
path=u"/tornadoasync", tag=u"async", category=u"Web Server") | |
writer.add_document(title=u"Python Tornado Templates", content=u"Tornado Web Server has template feature", | |
path=u"/tornadotemplates", tag=u"templates", category=u"Web Server") | |
writer.add_document(title=u"Python Tornado", content=u"Tornado Web Server is awesome", | |
path=u"/tornado", tag=u"great", category=u"Web Server") | |
writer.commit() | |
_queryparser = whoosh.qparser.QueryParser('content', schema=schema) | |
s = ix.searcher() | |
return s | |
class Home(tornado.web.RequestHandler): | |
def get(self): | |
self.write('It Works!') | |
class HighLighted(tornado.web.RequestHandler): | |
def get(self): | |
self.render('highlighted.html') | |
def post(self): | |
from whoosh import qparser | |
qstring = self.get_argument('qstring') | |
srch = Search('./indexer') | |
s = srch.searcher() | |
results = s.search(query.Term('content', qstring), limit=10) | |
titles = ''; contents = '' | |
for hit in results: | |
titles += hit["title"] + ' ' | |
contents += hit.highlights("content") + ' ' | |
self.write('Titles Hit: ' + titles + '<br /> ' + 'Contents Hit: ' + contents) | |
application = tornado.web.Application([ | |
(r"/",Home ), | |
(r"/highlighted", HighLighted ), | |
]) | |
if __name__ == "__main__": | |
application.listen(7777) | |
tornado.ioloop.IOLoop.instance().start() |
Search results for highlighted search