import numpy as np
from sklearn.linear_model import LogisticRegression
# Define states and actions
states = ['S0', 'S1', 'S2']
actions = ['a0', 'a1']
# Encode states and actions as numbers
state_to_idx = {s: i for i, s in enumerate(states)}
action_to_idx = {a: i for i, a in enumerate(actions)}
# Training data: (current_state, action) -> next_state
# Example data showing transitions
X_train = [] # features: current_state + action
y_train = [] # labels: next_state
# Sample training examples
transitions = [
('S0', 'a0', 'S1'),
('S0', 'a1', 'S2'),
('S1', 'a0', 'S2'),
('S1', 'a1', 'S0'),
('S2', 'a0', 'S0'),
('S2', 'a1', 'S1')
]
for (cs, ac, ns) in transitions:
feature = [state_to_idx[cs], action_to_idx[ac]]
label = state_to_idx[ns]
X_train.append(feature)
y_train.append(label)
X_train = np.array(X_train)
y_train = np.array(y_train)
# Train logistic regression to predict next state
model = LogisticRegression(multi_class='multinomial', max_iter=200)
model.fit(X_train, y_train)
# Function to predict next state given current state and action
def predict_next_state(current_state, action):
feature = np.array([[state_to_idx[current_state], action_to_idx[action]]])
pred_idx = model.predict(feature)[0]
return states[pred_idx]
# Simulate agent performing a sequence of actions
def simulate_agent(start_state, action_sequence):
state = start_state
states_visited = [state]
for action in action_sequence:
state = predict_next_state(state, action)
states_visited.append(state)
return states_visited
# Example simulation
start = 'S0'
actions_seq = ['a0', 'a1', 'a0', 'a1']
visited = simulate_agent(start, actions_seq)
# Calculate success rate: define success as reaching 'S0' at end
success = 1 if visited[-1] == 'S0' else 0
print(f"States visited: {visited}")
print(f"Success: {success}")