This program shows how to use regex in Pandas to filter, clean, extract, and match text data.
import pandas as pd
data = {
'email': ['alice@gmail.com', 'bob@yahoo.com', 'carol@gmail.com', 'dave@hotmail.com'],
'phone': ['(123) 456-7890', '987-654-3210', '555 666 7777', '444.333.2222'],
'name': ['Alice Smith', 'Bob Jones', 'Carol White', 'Dave Black'],
'code': ['A123', 'B234', 'A999', 'C456']
}
df = pd.DataFrame(data)
# Find emails with gmail
gmail_filter = df['email'].str.contains(r'@gmail\.com')
# Clean phone numbers to digits only
clean_phones = df['phone'].str.replace(r'\D', '', regex=True)
# Extract first name
first_names = df['name'].str.extract(r'^(\w+)')
# Check codes starting with A and 3 digits
code_match = df['code'].str.match(r'^A\d{3}$')
print('Rows with Gmail emails:')
print(df[gmail_filter])
print('\nClean phone numbers:')
print(clean_phones)
print('\nFirst names extracted:')
print(first_names)
print('\nCodes matching pattern:')
print(code_match)