Introduction
Tokenization breaks text into smaller pieces called tokens, like words or punctuation, so computers can understand and work with language.
Jump into concepts and practice - no test required
import spacy nlp = spacy.load('en_core_web_sm') doc = nlp('Your text here.') for token in doc: print(token.text)
import spacy nlp = spacy.load('en_core_web_sm') doc = nlp('Hello, world!') for token in doc: print(token.text)
doc = nlp('I love AI.') tokens = [token.text for token in doc] print(tokens)
doc = nlp('SpaCy is great for NLP.') print(doc[0].text)
import spacy # Load the English language model nlp = spacy.load('en_core_web_sm') # Text to tokenize text = "Hello, spaCy! Let's tokenize this sentence." # Process the text doc = nlp(text) # Print each token on a new line for token in doc: print(token.text)
import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp('Hello, world!')
tokens = [token.text for token in doc]
print(tokens)import spacy
nlp = spacy.load('en_core_web_sm')
doc = nlp('Test sentence.')
for token in doc:
print(token.text)