This code shows how to choose 2 or 3 topics and prints the top 3 words for each topic to help decide which number makes more sense.
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
# Sample documents
texts = [
'I love reading about machine learning and AI.',
'Deep learning is a part of machine learning.',
'The economy is growing fast this year.',
'Stock markets are unpredictable and volatile.',
'AI can help improve healthcare and medicine.',
'Investing in stocks requires knowledge of the market.'
]
# Convert texts to word counts
vectorizer = CountVectorizer(stop_words='english')
data = vectorizer.fit_transform(texts)
# Try different numbers of topics
for n_topics in [2, 3]:
model = LatentDirichletAllocation(n_components=n_topics, random_state=0)
model.fit(data)
print(f'Number of topics: {n_topics}')
for idx, topic in enumerate(model.components_):
top_words = [vectorizer.get_feature_names_out()[i] for i in topic.argsort()[-3:][::-1]]
print(f' Topic {idx+1}: {", ".join(top_words)}')
print()