This program creates a simple pipeline that cleans data by removing rows with missing values, then calculates the average of each column.
import pandas as pd
class AgentPipeline:
def __init__(self, steps):
self.steps = steps
def run(self, data):
for step in self.steps:
data = step(data)
return data
# Step 1: Clean data by dropping missing values
def clean_data(data):
return data.dropna()
# Step 2: Analyze data by calculating mean of each column
def analyze_data(data):
return data.mean()
# Sample raw data with missing values
raw_data = pd.DataFrame({
'age': [25, 30, None, 22],
'score': [88, None, 92, 85]
})
# Create pipeline with two steps
pipeline = AgentPipeline(steps=[clean_data, analyze_data])
# Run pipeline on raw data
result = pipeline.run(raw_data)
print(result)