This code shows how to use map, filter, and flatMap on a small list of numbers. It first multiplies each number by 3, then keeps only those bigger than 7, and finally for each number, creates two numbers: itself and one more.
from pyspark.sql import SparkSession
spark = SparkSession.builder.master('local').appName('MapFilterFlatMap').getOrCreate()
rdd = spark.sparkContext.parallelize([1, 2, 3, 4, 5])
# Map: multiply each number by 3
mapped = rdd.map(lambda x: x * 3)
# Filter: keep numbers greater than 7
filtered = mapped.filter(lambda x: x > 7)
# FlatMap: for each number, create number and number+1
flat_mapped = filtered.flatMap(lambda x: [x, x + 1])
print('Original:', rdd.collect())
print('Mapped (x3):', mapped.collect())
print('Filtered (>7):', filtered.collect())
print('FlatMapped (number and number+1):', flat_mapped.collect())
spark.stop()