import gensim
from gensim import corpora
from gensim.models.ldamodel import LdaModel
from gensim.models.coherencemodel import CoherenceModel
# Sample preprocessed documents (list of token lists)
documents = [
['human', 'interface', 'computer'],
['survey', 'user', 'computer', 'system', 'response', 'time'],
['eps', 'user', 'interface', 'system'],
['system', 'human', 'system', 'eps'],
['user', 'response', 'time'],
['trees'],
['graph', 'trees'],
['graph', 'minors', 'trees'],
['graph', 'minors', 'survey']
]
# Create dictionary and corpus
id2word = corpora.Dictionary(documents)
corpus = [id2word.doc2bow(doc) for doc in documents]
# Train LDA model with tuned parameters
num_topics = 3
passes = 20
lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, passes=passes, random_state=42)
# Compute coherence score
coherence_model_lda = CoherenceModel(model=lda_model, texts=documents, dictionary=id2word, coherence='c_v')
coherence_lda = coherence_model_lda.get_coherence()
print(f"Topic Coherence Score: {coherence_lda:.2f}")