Introduction
LDA helps find hidden topics in a bunch of documents. Gensim makes it easy to do this with simple code.
Jump into concepts and practice - no test required
from gensim import corpora, models # Prepare data texts = [['word1', 'word2'], ['word3', 'word4']] dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] # Train LDA model lda_model = models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=10) # Get topics topics = lda_model.print_topics()
texts = [['apple', 'banana', 'apple'], ['banana', 'orange']] dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] lda_model = models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=5) topics = lda_model.print_topics()
texts = [['cat', 'dog'], ['dog', 'mouse'], ['cat', 'mouse', 'dog']] dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] lda_model = models.LdaModel(corpus, num_topics=3, id2word=dictionary, passes=15) topics = lda_model.print_topics()
from gensim import corpora, models # Sample documents texts = [ ['human', 'interface', 'computer'], ['survey', 'user', 'computer', 'system', 'response', 'time'], ['eps', 'user', 'interface', 'system'], ['system', 'human', 'system', 'eps'], ['user', 'response', 'time'], ['trees'], ['graph', 'trees'], ['graph', 'minors', 'trees'], ['graph', 'minors', 'survey'] ] # Create dictionary and corpus dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] # Train LDA model lda_model = models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=20) # Print topics for idx, topic in lda_model.print_topics(-1): print(f"Topic {idx}: {topic}")
texts?print(ldamodel.print_topics(num_topics=2))?
import gensim from gensim import corpora texts = [['apple', 'banana', 'apple'], ['banana', 'orange'], ['apple', 'orange', 'banana']] dictionary = corpora.Dictionary(texts) corpus = [dictionary.doc2bow(text) for text in texts] ldamodel = gensim.models.LdaModel(corpus, num_topics=2, id2word=dictionary, passes=10, random_state=42) print(ldamodel.print_topics(num_topics=2))
AttributeError: 'LdaModel' object has no attribute 'show_topics'. What is the likely cause?
ldamodel = gensim.models.LdaModel(corpus, num_topics=3, id2word=dictionary) print(ldamodel.show_topics())