Introduction
Removing punctuation and special characters helps clean text data so machines can understand the important words better.
Jump into concepts and practice - no test required
import re def clean_text(text): return re.sub(r'[^a-zA-Z0-9\s]', '', text)
clean_text("Hello, world!") # Output: 'Hello world'
clean_text("Good morning :) #sunshine") # Output: 'Good morning sunshine'
clean_text("Price: $100.00") # Output: 'Price 10000'
import re def clean_text(text): return re.sub(r'[^a-zA-Z0-9\s]', '', text) texts = [ "Hello, world!", "Good morning :) #sunshine", "Price: $100.00", "Email me at example@example.com!" ] for t in texts: print(f"Original: {t}") print(f"Cleaned: {clean_text(t)}") print()
text = "Hello, world!" using regular expressions?import re text = "Hello, world! Let's clean: this text." clean_text = re.sub(r'[^\\w\\s]', '', text) print(clean_text)
import re text = "Good morning! How are you?" clean_text = re.sub(r'[\w]', '', text) print(clean_text)