The before code scans every document for the query word, which is slow for many documents. The after code builds an inverted index mapping words to documents, enabling fast lookup by intersecting sets of documents containing each query word.
### Before: Naive search scanning all documents
class SearchEngine:
def __init__(self, documents):
self.documents = documents
def search(self, query):
results = []
for doc_id, text in self.documents.items():
if query.lower() in text.lower():
results.append(doc_id)
return results
### After: Using inverted index for fast lookup
class SearchEngine:
def __init__(self, documents):
self.documents = documents
self.index = self.build_index(documents)
def build_index(self, documents):
index = {}
for doc_id, text in documents.items():
for word in text.lower().split():
index.setdefault(word, set()).add(doc_id)
return index
def search(self, query):
query_words = query.lower().split()
if not query_words:
return []
result_sets = [self.index.get(word, set()) for word in query_words]
# Intersection of sets to find docs containing all query words
results = set.intersection(*result_sets) if result_sets else set()
return list(results)