Introduction
Latent Dirichlet Allocation (LDA) helps find hidden topics in a collection of texts. It groups words that often appear together to understand what the texts are about.
Jump into concepts and practice - no test required
from sklearn.decomposition import LatentDirichletAllocation lda = LatentDirichletAllocation(n_components=number_of_topics, random_state=seed) lda.fit(document_term_matrix)
lda = LatentDirichletAllocation(n_components=3, random_state=42) lda.fit(X)
lda = LatentDirichletAllocation(n_components=5)
lda.fit(X)from sklearn.feature_extraction.text import CountVectorizer from sklearn.decomposition import LatentDirichletAllocation # Sample documents texts = [ 'I love reading about machine learning and AI.', 'AI and machine learning are fascinating fields.', 'The cat sat on the mat.', 'Cats and dogs are common pets.', 'I enjoy walking my dog in the park.' ] # Convert texts to a matrix of token counts vectorizer = CountVectorizer(stop_words='english') X = vectorizer.fit_transform(texts) # Create LDA model to find 2 topics lda = LatentDirichletAllocation(n_components=2, random_state=0) lda.fit(X) # Show top words for each topic n_top_words = 3 feature_names = vectorizer.get_feature_names_out() for topic_idx, topic in enumerate(lda.components_): top_words = [feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1]] print(f"Topic {topic_idx + 1}: {', '.join(top_words)}")
print(ldamodel.print_topics(num_topics=2))?
from gensim.models.ldamodel import LdaModel
corpus = [[(0, 1), (1, 2)], [(0, 1), (2, 1)]]
dictionary = {0: 'apple', 1: 'banana', 2: 'cherry'}
ldamodel = LdaModel(corpus=corpus, num_topics=2, id2word=dictionary, random_state=42)
print(ldamodel.print_topics(num_topics=2))AttributeError: 'dict' object has no attribute 'token2id'. What is the likely cause?