import spacy
from spacy.training.example import Example
# Load a blank English model
nlp = spacy.blank('en')
# Add the NER pipeline component
ner = nlp.add_pipe('ner')
# Add labels to the NER component
labels = ['PERSON', 'ORG', 'GPE', 'DATE', 'MONEY']
for label in labels:
ner.add_label(label)
# Sample training data (text, annotations with entities and their types)
TRAIN_DATA = [
('Apple is looking at buying U.K. startup for $1 billion', {'entities': [(0, 5, 'ORG'), (27, 31, 'GPE'), (44, 54, 'MONEY')]}),
('San Francisco considers banning sidewalk delivery robots', {'entities': [(0, 13, 'GPE')]}),
('Barack Obama was born on August 4, 1961', {'entities': [(0, 12, 'PERSON'), (25, 38, 'DATE')]})
]
# Disable other pipes during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):
optimizer = nlp.begin_training()
for epoch in range(30):
losses = {}
for text, annotations in TRAIN_DATA:
doc = nlp.make_doc(text)
example = Example.from_dict(doc, annotations)
nlp.update([example], drop=0.2, sgd=optimizer, losses=losses)
if epoch % 5 == 0:
print(f'Epoch {epoch}, Losses: {losses}')
# Test the improved model
test_text = 'Google was founded by Larry Page and Sergey Brin in California in 1998.'
doc = nlp(test_text)
entities = [(ent.text, ent.label_) for ent in doc.ents]
print('Extracted Entities:', entities)