from gensim.models import Word2Vec
# Sample training data: list of sentences (each sentence is a list of words)
sentences = [
['machine', 'learning', 'is', 'fun'],
['deep', 'learning', 'models', 'are', 'powerful'],
['natural', 'language', 'processing', 'is', 'a', 'part', 'of', 'ai'],
['word2vec', 'creates', 'word', 'embeddings'],
['embeddings', 'capture', 'semantic', 'meaning']
]
# Train Word2Vec model with improved parameters
model = Word2Vec(
sentences,
vector_size=100, # increased vector size
window=5, # increased window size
min_count=1, # include all words
epochs=50 # increased epochs
)
# Test similarity between related words
similarity = model.wv.similarity('machine', 'learning')
print(f"Similarity between 'machine' and 'learning': {similarity:.2f}")
# Save model for later use
model.save('word2vec.model')