import gensim
from gensim.models import Word2Vec
# Sample sentences for training
sentences = [
['king', 'queen', 'man', 'woman'],
['apple', 'orange', 'fruit', 'banana'],
['car', 'bus', 'train', 'vehicle'],
['dog', 'cat', 'pet', 'animal'],
['king', 'man', 'royal', 'crown'],
['queen', 'woman', 'royal', 'crown']
]
# Train Word2Vec model
model = Word2Vec(sentences, vector_size=50, window=3, min_count=1, sg=1, negative=5, epochs=100)
# Function to compute cosine similarity
from numpy import dot
from numpy.linalg import norm
def cosine_similarity(vec1, vec2):
return dot(vec1, vec2) / (norm(vec1) * norm(vec2))
# Check similarity between similar and unrelated words
similar_pairs = [('king', 'queen'), ('apple', 'banana'), ('dog', 'cat')]
unrelated_pairs = [('king', 'apple'), ('car', 'dog'), ('fruit', 'train')]
similar_scores = [cosine_similarity(model.wv[w1], model.wv[w2]) for w1, w2 in similar_pairs]
unrelated_scores = [cosine_similarity(model.wv[w1], model.wv[w2]) for w1, w2 in unrelated_pairs]
print('Similar pairs cosine similarity:', similar_scores)
print('Unrelated pairs cosine similarity:', unrelated_scores)