import numpy as np
# Sample word embeddings dictionary (word: vector)
embeddings = {
'king': np.array([0.5, 0.8, 0.1]),
'queen': np.array([0.45, 0.85, 0.15]),
'man': np.array([0.6, 0.7, 0.2]),
'woman': np.array([0.55, 0.75, 0.25]),
'apple': np.array([0.1, 0.2, 0.9]),
'orange': np.array([0.15, 0.25, 0.85])
}
# Normalize embeddings for better cosine similarity
for word in embeddings:
embeddings[word] = embeddings[word] / np.linalg.norm(embeddings[word])
# Function to compute cosine similarity
def cosine_similarity(vec1, vec2):
return np.dot(vec1, vec2)
# Function to find most similar word to a vector
def most_similar(vec, embeddings, exclude=[]):
max_sim = -1
best_word = None
for word, emb in embeddings.items():
if word in exclude:
continue
sim = cosine_similarity(vec, emb)
if sim > max_sim:
max_sim = sim
best_word = word
return best_word
# Analogy: king - man + woman = ?
analogy_vec = embeddings['king'] - embeddings['man'] + embeddings['woman']
analogy_vec /= np.linalg.norm(analogy_vec) # normalize
result = most_similar(analogy_vec, embeddings, exclude=['king', 'man', 'woman'])
# Output
print(f"Analogy result for 'king - man + woman': {result}")
# Expected output: queen